< prev index next >

src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

Print this page
rev 60227 : 8224225: Tokenizer improvements
Reviewed-by: jlaskey
   1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;

  33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  36 import com.sun.tools.javac.util.*;
  37 import com.sun.tools.javac.util.JCDiagnostic.*;
  38 
  39 import java.lang.reflect.InvocationTargetException;
  40 import java.lang.reflect.Method;
  41 import java.nio.CharBuffer;
  42 import java.util.HashSet;
  43 import java.util.Set;

  44 
  45 import static com.sun.tools.javac.parser.Tokens.*;
  46 import static com.sun.tools.javac.util.LayoutCharacters.*;
  47 
  48 /** The lexical analyzer maps an input stream consisting of
  49  *  ASCII characters and Unicode escapes into a token sequence.

  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer {
  57 


  58     private static final boolean scannerDebug = false;
  59 
  60     /** The source language setting.






  61      */
  62     private Source source;
  63 
  64     /** The preview language setting. */


  65     private Preview preview;
  66 
  67     /** The log to be used for error reporting.

  68      */
  69     private final Log log;
  70 
  71     /** The token factory. */


  72     private final Tokens tokens;
  73 
  74     /** The token kind, set by nextToken().






  75      */
  76     protected TokenKind tk;
  77 
  78     /** The token's radix, set by nextToken().

  79      */
  80     protected int radix;
  81 
  82     /** The token's name, set by nextToken().

  83      */
  84     protected Name name;
  85 
  86     /** The position where a lexical error occurred;

  87      */
  88     protected int errPos = Position.NOPOS;
  89 
  90     /** The Unicode reader (low-level stream reader).
  91      */
  92     protected UnicodeReader reader;
  93 
  94     /** If is a text block
  95      */
  96     protected boolean isTextBlock;
  97 
  98     /** If contains escape sequences

  99      */
 100     protected boolean hasEscapeSequences;
 101 








 102     protected ScannerFactory fac;
 103 
 104     // The set of lint options currently in effect. It is initialized
 105     // from the context, and then is set/reset as needed by Attr as it
 106     // visits all the various parts of the trees during attribution.


 107     protected Lint lint;
 108 
 109     private static final boolean hexFloatsWork = hexFloatsWork();
 110     private static boolean hexFloatsWork() {
 111         try {
 112             Float.valueOf("0x1.0p1");
 113             return true;
 114         } catch (NumberFormatException ex) {
 115             return false;
 116         }
 117     }
 118 
 119     /**
 120      * Create a scanner from the input array.  This method might
 121      * modify the array.  To avoid copying the input array, ensure
 122      * that {@code inputLength < input.length} or
 123      * {@code input[input.length -1]} is a white space character.
 124      *
 125      * @param fac the factory which created this Scanner
 126      * @param buf the input, might be modified
 127      * Must be positive and less than or equal to input.length.
 128      */
 129     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 130         this(fac, new UnicodeReader(fac, buf));
 131     }
 132 
 133     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 134         this(fac, new UnicodeReader(fac, buf, inputLength));
 135     }
 136 
 137     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 138         this.fac = fac;
 139         this.log = fac.log;

 140         this.tokens = fac.tokens;
 141         this.source = fac.source;
 142         this.preview = fac.preview;
 143         this.reader = reader;
 144         this.lint = fac.lint;

 145     }
 146 






 147     protected void checkSourceLevel(int pos, Feature feature) {
 148         if (preview.isPreview(feature) && !preview.isEnabled()) {
 149             //preview feature without --preview flag, error
 150             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 151         } else if (!feature.allowedInSource(source)) {
 152             //incompatible source level, error
 153             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 154         } else if (preview.isPreview(feature)) {
 155             //use of preview feature, warn
 156             preview.warnPreview(pos, feature);
 157         }
 158     }
 159 
 160     /** Report an error at the given position using the provided arguments.




 161      */
 162     protected void lexError(int pos, JCDiagnostic.Error key) {
 163         log.error(pos, key);
 164         tk = TokenKind.ERROR;
 165         errPos = pos;
 166     }
 167 







 168     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 169         log.error(flags, pos, key);
 170         tk = TokenKind.ERROR;
 171         errPos = pos;
 172     }
 173 







 174     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 175         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 176         log.warning(lc, dp, key);
 177     }
 178 
 179     /** Read next character in character or string literal and copy into sbuf.
 180      *      pos - start of literal offset
 181      *      translateEscapesNow - true if String::translateEscapes is not available
 182      *                            in the java.base libs. Occurs during bootstrapping.
 183      *      multiline - true if scanning a text block. Allows newlines to be embedded
 184      *                  in the result.
 185      */
 186     private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
 187          if (reader.ch == '\\') {
 188             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 189                 reader.skipChar();
 190                 if (!translateEscapesNow) {
 191                     reader.putChar(false);
 192                 }
 193                 reader.putChar(true);
 194             } else {
 195                 reader.nextChar(translateEscapesNow);
 196                 switch (reader.ch) {
 197                 case '0': case '1': case '2': case '3':
 198                 case '4': case '5': case '6': case '7':
 199                     char leadch = reader.ch;
 200                     int oct = reader.digit(pos, 8);
 201                     reader.nextChar(translateEscapesNow);
 202                     if ('0' <= reader.ch && reader.ch <= '7') {
 203                         oct = oct * 8 + reader.digit(pos, 8);
 204                         reader.nextChar(translateEscapesNow);
 205                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 206                             oct = oct * 8 + reader.digit(pos, 8);
 207                             reader.nextChar(translateEscapesNow);
 208                         }
 209                     }
 210                     if (translateEscapesNow) {
 211                         reader.putChar((char)oct);






 212                     }
 213                     break;
 214                 case 'b':
 215                     reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
 216                 case 't':
 217                     reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
 218                 case 'n':
 219                     reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
 220                 case 'f':
 221                     reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
 222                 case 'r':
 223                     reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
 224                 case '\'':
 225                 case '\"':
 226                 case '\\':
 227                     reader.putChar(true); break;
 228                 case 's':
 229                     checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
 230                     reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
 231                 case '\n':
 232                 case '\r':
 233                     if (!multiline) {
 234                         lexError(reader.bp, Errors.IllegalEscChar);
 235                     } else {
 236                         checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
 237                         int start = reader.bp;
 238                         if (reader.ch == '\r' && reader.peekChar() == '\n') {
 239                            reader.nextChar(translateEscapesNow);
 240                         }
 241                         reader.nextChar(translateEscapesNow);
 242                         processLineTerminator(start, reader.bp);
 243                     }
 244                     break;
 245                 default:
 246                     lexError(reader.bp, Errors.IllegalEscChar);
 247                 }
 248             }
 249         } else if (reader.bp != reader.buflen) {
 250             reader.putChar(true);
 251         }
 252     }
 253 
 254     /** Interim access to String methods used to support text blocks.
 255      *  Required to handle bootstrapping with pre-text block jdks.
 256      *  Should be replaced with direct calls in the 'next' jdk.
 257      */
 258     static class TextBlockSupport {
 259         /** Reflection method to remove incidental indentation.
 260          */
 261         private static final Method stripIndent;


 262 
 263         /** Reflection method to translate escape sequences.

 264          */
 265         private static final Method translateEscapes;

 266 
 267         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
 268          */
 269         private static final boolean hasSupport;
 270 
 271         /** Get a string method via refection or null if not available.






 272          */
 273         private static Method getStringMethodOrNull(String name) {
 274             try {
 275                 return String.class.getMethod(name);
 276             } catch (Exception ex) {
 277                 // Method not available, return null.
 278             }
 279             return null;
 280         }
 281 
 282         static {
 283             // Get text block string methods.
 284             stripIndent = getStringMethodOrNull("stripIndent");
 285             translateEscapes = getStringMethodOrNull("translateEscapes");
 286             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
 287             hasSupport = stripIndent != null && translateEscapes != null;
 288         }
 289 
 290         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
 291          */
 292         static boolean hasSupport() {
 293             return hasSupport;
 294         }
 295 
 296         /** Return the leading whitespace count (indentation) of the line.
 297          */
 298         private static int indent(String line) {
 299             return line.length() - line.stripLeading().length();
 300         }
 301 
 302         enum WhitespaceChecks {
 303             INCONSISTENT,
 304             TRAILING
 305         };
 306 
 307         /** Check that the use of white space in content is not problematic.
 308          */
 309         static Set<WhitespaceChecks> checkWhitespace(String string) {
 310             // Start with empty result set.
 311             Set<WhitespaceChecks> checks = new HashSet<>();
 312             // No need to check empty strings.
 313             if (string.isEmpty()) {
 314                 return checks;
 315             }
 316             // Maximum common indentation.
 317             int outdent = 0;
 318             // No need to check indentation if opting out (last line is empty.)
 319             char lastChar = string.charAt(string.length() - 1);
 320             boolean optOut = lastChar == '\n' || lastChar == '\r';
 321             // Split string based at line terminators.
 322             String[] lines = string.split("\\R");
 323             int length = lines.length;
 324             // Extract last line.
 325             String lastLine = length == 0 ? "" : lines[length - 1];
 326              if (!optOut) {
 327                 // Prime with the last line indentation (may be blank.)
 328                 outdent = indent(lastLine);
 329                 for (String line : lines) {
 330                     // Blanks lines have no influence (last line accounted for.)
 331                     if (!line.isBlank()) {
 332                         outdent = Integer.min(outdent, indent(line));
 333                         if (outdent == 0) {
 334                             break;
 335                         }
 336                     }
 337                 }
 338             }
 339             // Last line is representative.
 340             String start = lastLine.substring(0, outdent);
 341             for (String line : lines) {
 342                 // Fail if a line does not have the same indentation.
 343                 if (!line.isBlank() && !line.startsWith(start)) {
 344                     // Mix of different white space
 345                     checks.add(WhitespaceChecks.INCONSISTENT);
 346                 }
 347                 // Line has content even after indent is removed.
 348                 if (outdent < line.length()) {
 349                     // Is the last character a white space.
 350                     lastChar = line.charAt(line.length() - 1);
 351                     if (Character.isWhitespace(lastChar)) {
 352                         // Has trailing white space.
 353                         checks.add(WhitespaceChecks.TRAILING);
 354                     }
 355                 }
 356             }
 357             return checks;
 358         }
 359 
 360         /** Invoke String::stripIndent through reflection.
 361          */
 362         static String stripIndent(String string) {
 363             try {
 364                 string = (String)stripIndent.invoke(string);
 365             } catch (InvocationTargetException | IllegalAccessException ex) {
 366                 throw new RuntimeException(ex);
 367             }
 368             return string;
 369         }
 370 
 371         /** Invoke String::translateEscapes through reflection.







 372          */
 373         static String translateEscapes(String string) {
 374             try {
 375                 string = (String)translateEscapes.invoke(string);
 376             } catch (InvocationTargetException | IllegalAccessException ex) {
 377                 throw new RuntimeException(ex);
 378             }
 379             return string;
 380         }


 381     }
 382 
 383     /** Test for EOLN.



 384      */
 385     private boolean isEOLN() {
 386         return reader.ch == LF || reader.ch == CR;
 387     }
 388 
 389     /** Test for CRLF.

 390      */
 391     private boolean isCRLF() {
 392         return reader.ch == CR && reader.peekChar() == LF;



 393     }
 394 
 395     /** Count and skip repeated occurrences of the specified character.





 396      */
 397     private int countChar(char ch, int max) {
 398         int count = 0;
 399         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
 400             reader.scanChar();











 401         }
 402         return count;
 403     }

 404 
 405     /** Skip and process a line terminator.
 406      */
 407     private void skipLineTerminator() {
 408         int start = reader.bp;
 409         if (isCRLF()) {
 410             reader.scanChar();



























 411         }
 412         reader.scanChar();
 413         processLineTerminator(start, reader.bp);
 414     }
 415 
 416     /** Scan a string literal or text block.



 417      */
 418     private void scanString(int pos) {
 419         // Clear flags.
 420         isTextBlock = false;
 421         hasEscapeSequences = false;
 422         // Track the end of first line for error recovery.
 423         int firstEOLN = -1;
 424         // Attempt to scan for up to 3 double quotes.
 425         int openCount = countChar('\"', 3);
 426         switch (openCount) {
 427         case 1: // Starting a string literal.
 428             break;
 429         case 2: // Starting an empty string literal.
 430             tk = Tokens.TokenKind.STRINGLITERAL;
 431             return;
 432         case 3: // Starting a text block.




 433             // Check if preview feature is enabled for text blocks.
 434             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 435             isTextBlock = true;
 436             // Verify the open delimiter sequence.
 437             while (reader.bp < reader.buflen) {
 438                 char ch = reader.ch;
 439                 if (ch != ' ' && ch != '\t' && ch != FF) {
 440                     break;




 441                 }
 442                 reader.scanChar();




 443             }

 444             if (isEOLN()) {
 445                 skipLineTerminator();







 446             } else {
 447                 // Error if the open delimiter sequence is not
 448                 //     """<white space>*<LineTerminator>.
 449                 lexError(reader.bp, Errors.IllegalTextBlockOpen);
 450                 return;
 451             }
 452             break;
 453         }




 454         // While characters are available.
 455         while (reader.bp < reader.buflen) {
 456             // If possible close delimiter sequence.
 457             if (reader.ch == '\"') {
 458                 // Check to see if enough double quotes are present.
 459                 int closeCount = countChar('\"', openCount);
 460                 if (openCount == closeCount) {
 461                     // Good result.
 462                     tk = Tokens.TokenKind.STRINGLITERAL;
 463                     return;
 464                 }
 465                 // False alarm, add double quotes to string buffer.
 466                 reader.repeat('\"', closeCount);
 467             } else if (isEOLN()) {
 468                 // Line terminator in string literal is an error.
 469                 // Fall out to unclosed string literal error.
 470                 if (openCount == 1) {
 471                     break;
 472                 }
 473                 skipLineTerminator();
 474                 // Add line terminator to string buffer.
 475                 reader.putChar('\n', false);
 476                 // Record first line terminator for error recovery.
 477                 if (firstEOLN == -1) {
 478                     firstEOLN = reader.bp;
 479                 }
 480             } else if (reader.ch == '\\') {
 481                 // Handle escape sequences.
 482                 hasEscapeSequences = true;
 483                 // Translate escapes immediately if TextBlockSupport is not available
 484                 // during bootstrapping.
 485                 boolean translateEscapesNow = !TextBlockSupport.hasSupport();
 486                 scanLitChar(pos, translateEscapesNow, openCount != 1);
 487             } else {
 488                 // Add character to string buffer.
 489                 reader.putChar(true);

 490             }
 491         }

 492         // String ended without close delimiter sequence.
 493         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
 494         if (firstEOLN  != -1) {
 495             // Reset recovery position to point after open delimiter sequence.
 496             reader.reset(firstEOLN);

 497         }
 498     }
 499 






 500     private void scanDigits(int pos, int digitRadix) {
 501         char saveCh;
 502         int savePos;

 503         do {
 504             if (reader.ch != '_') {
 505                 reader.putChar(false);












 506             }
 507             saveCh = reader.ch;
 508             savePos = reader.bp;
 509             reader.scanChar();
 510         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 511         if (saveCh == '_')
 512             lexError(savePos, Errors.IllegalUnderscore);
 513     }
 514 
 515     /** Read fractional part of hexadecimal floating point number.



 516      */
 517     private void scanHexExponentAndSuffix(int pos) {
 518         if (reader.ch == 'p' || reader.ch == 'P') {
 519             reader.putChar(true);
 520             skipIllegalUnderscores();
 521             if (reader.ch == '+' || reader.ch == '-') {
 522                 reader.putChar(true);
 523             }
 524             skipIllegalUnderscores();
 525             if (reader.digit(pos, 10) >= 0) {

 526                 scanDigits(pos, 10);
 527                 if (!hexFloatsWork)
 528                     lexError(pos, Errors.UnsupportedCrossFpLit);
 529             } else
 530                 lexError(pos, Errors.MalformedFpLit);

 531         } else {
 532             lexError(pos, Errors.MalformedFpLit);
 533         }
 534         if (reader.ch == 'f' || reader.ch == 'F') {
 535             reader.putChar(true);
 536             tk = TokenKind.FLOATLITERAL;
 537             radix = 16;
 538         } else {
 539             if (reader.ch == 'd' || reader.ch == 'D') {
 540                 reader.putChar(true);
 541             }
 542             tk = TokenKind.DOUBLELITERAL;
 543             radix = 16;
 544         }
 545     }
 546 
 547     /** Read fractional part of floating point number.



 548      */
 549     private void scanFraction(int pos) {
 550         skipIllegalUnderscores();
 551         if (reader.digit(pos, 10) >= 0) {

 552             scanDigits(pos, 10);
 553         }
 554         int sp1 = reader.sp;
 555         if (reader.ch == 'e' || reader.ch == 'E') {
 556             reader.putChar(true);

 557             skipIllegalUnderscores();
 558             if (reader.ch == '+' || reader.ch == '-') {
 559                 reader.putChar(true);
 560             }
 561             skipIllegalUnderscores();
 562             if (reader.digit(pos, 10) >= 0) {

 563                 scanDigits(pos, 10);
 564                 return;
 565             }

 566             lexError(pos, Errors.MalformedFpLit);
 567             reader.sp = sp1;
 568         }
 569     }
 570 
 571     /** Read fractional part and 'd' or 'f' suffix of floating point number.



 572      */
 573     private void scanFractionAndSuffix(int pos) {
 574         radix = 10;
 575         scanFraction(pos);
 576         if (reader.ch == 'f' || reader.ch == 'F') {
 577             reader.putChar(true);
 578             tk = TokenKind.FLOATLITERAL;
 579         } else {
 580             if (reader.ch == 'd' || reader.ch == 'D') {
 581                 reader.putChar(true);
 582             }
 583             tk = TokenKind.DOUBLELITERAL;
 584         }
 585     }
 586 
 587     /** Read fractional part and 'd' or 'f' suffix of floating point number.



 588      */
 589     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 590         radix = 16;
 591         Assert.check(reader.ch == '.');
 592         reader.putChar(true);
 593         skipIllegalUnderscores();
 594         if (reader.digit(pos, 16) >= 0) {

 595             seendigit = true;
 596             scanDigits(pos, 16);
 597         }

 598         if (!seendigit)
 599             lexError(pos, Errors.InvalidHexNumber);
 600         else
 601             scanHexExponentAndSuffix(pos);
 602     }
 603 



 604     private void skipIllegalUnderscores() {
 605         if (reader.ch == '_') {
 606             lexError(reader.bp, Errors.IllegalUnderscore);
 607             while (reader.ch == '_')
 608                 reader.scanChar();
 609         }
 610     }
 611 
 612     /** Read a number.
 613      *  @param radix  The radix of the number; one of 2, 8, 10, 16.



 614      */
 615     private void scanNumber(int pos, int radix) {
 616         // for octal, allow base-10 digit in case it's a float literal
 617         this.radix = radix;
 618         int digitRadix = (radix == 8 ? 10 : radix);
 619         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 620         boolean seendigit = firstDigit >= 0;
 621         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;

 622         if (seendigit) {
 623             scanDigits(pos, digitRadix);
 624         }
 625         if (radix == 16 && reader.ch == '.') {

 626             scanHexFractionAndSuffix(pos, seendigit);
 627         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 628             scanHexExponentAndSuffix(pos);
 629         } else if (digitRadix == 10 && reader.ch == '.') {
 630             reader.putChar(true);
 631             scanFractionAndSuffix(pos);
 632         } else if (digitRadix == 10 &&
 633                    (reader.ch == 'e' || reader.ch == 'E' ||
 634                     reader.ch == 'f' || reader.ch == 'F' ||
 635                     reader.ch == 'd' || reader.ch == 'D')) {
 636             scanFractionAndSuffix(pos);
 637         } else {
 638             if (!seenValidDigit) {
 639                 switch (radix) {
 640                 case 2:
 641                     lexError(pos, Errors.InvalidBinaryNumber);
 642                     break;
 643                 case 16:
 644                     lexError(pos, Errors.InvalidHexNumber);
 645                     break;
 646                 }
 647             }
 648             if (reader.ch == 'l' || reader.ch == 'L') {
 649                 reader.scanChar();
 650                 tk = TokenKind.LONGLITERAL;
 651             } else {
 652                 tk = TokenKind.INTLITERAL;
 653             }
 654         }
 655     }
 656 
 657     /** Read an identifier.









 658      */
 659     private void scanIdent() {
 660         boolean isJavaIdentifierPart;
 661         char high;
 662         reader.putChar(true);
 663         do {
 664             switch (reader.ch) {
 665             case 'A': case 'B': case 'C': case 'D': case 'E':
 666             case 'F': case 'G': case 'H': case 'I': case 'J':
 667             case 'K': case 'L': case 'M': case 'N': case 'O':
 668             case 'P': case 'Q': case 'R': case 'S': case 'T':
 669             case 'U': case 'V': case 'W': case 'X': case 'Y':
 670             case 'Z':
 671             case 'a': case 'b': case 'c': case 'd': case 'e':
 672             case 'f': case 'g': case 'h': case 'i': case 'j':
 673             case 'k': case 'l': case 'm': case 'n': case 'o':
 674             case 'p': case 'q': case 'r': case 's': case 't':
 675             case 'u': case 'v': case 'w': case 'x': case 'y':
 676             case 'z':
 677             case '$': case '_':
 678             case '0': case '1': case '2': case '3': case '4':
 679             case '5': case '6': case '7': case '8': case '9':
 680                 break;

 681             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 682             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 683             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 684             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 685             case '\u0015': case '\u0016': case '\u0017':
 686             case '\u0018': case '\u0019': case '\u001B':
 687             case '\u007F':
 688                 reader.scanChar();
 689                 continue;

 690             case '\u001A': // EOI is also a legal identifier part
 691                 if (reader.bp >= reader.buflen) {
 692                     name = reader.name();
 693                     tk = tokens.lookupKind(name);
 694                     return;
 695                 }
 696                 reader.scanChar();

 697                 continue;

 698             default:
 699                 if (reader.ch < '\u0080') {


 700                     // all ASCII range chars already handled, above
 701                     isJavaIdentifierPart = false;
 702                 } else {
 703                     if (Character.isIdentifierIgnorable(reader.ch)) {
 704                         reader.scanChar();
 705                         continue;
 706                     } else {
 707                         int codePoint = reader.peekSurrogates();
 708                         if (codePoint >= 0) {
 709                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 710                                 reader.putChar(true);
 711                             }
 712                         } else {
 713                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 714                         }
 715                     }




 716                 }

 717                 if (!isJavaIdentifierPart) {
 718                     name = reader.name();
 719                     tk = tokens.lookupKind(name);
 720                     return;
 721                 }
 722             }
 723             reader.putChar(true);

 724         } while (true);
 725     }
 726 
 727     /** Return true if reader.ch can be part of an operator.





 728      */
 729     private boolean isSpecial(char ch) {
 730         switch (ch) {
 731         case '!': case '%': case '&': case '*': case '?':
 732         case '+': case '-': case ':': case '<': case '=':
 733         case '>': case '^': case '|': case '~':
 734         case '@':
 735             return true;

 736         default:
 737             return false;
 738         }
 739     }
 740 
 741     /** Read longest possible sequence of special characters and convert
 742      *  to token.
 743      */
 744     private void scanOperator() {
 745         while (true) {
 746             reader.putChar(false);
 747             Name newname = reader.name();
 748             TokenKind tk1 = tokens.lookupKind(newname);
 749             if (tk1 == TokenKind.IDENTIFIER) {
 750                 reader.sp--;







 751                 break;
 752             }
 753             tk = tk1;
 754             reader.scanChar();
 755             if (!isSpecial(reader.ch)) break;
 756         }
 757     }
 758 
 759     /** Read token.

 760      */
 761     public Token readToken() {
 762 
 763         reader.sp = 0;
 764         name = null;
 765         radix = 0;


 766 
 767         int pos = 0;
 768         int endPos = 0;
 769         List<Comment> comments = null;
 770 
 771         try {
 772             loop: while (true) {
 773                 pos = reader.bp;
 774                 switch (reader.ch) {

 775                 case ' ': // (Spec 3.6)
 776                 case '\t': // (Spec 3.6)
 777                 case FF: // (Spec 3.6)
 778                     do {
 779                         reader.scanChar();
 780                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 781                     processWhiteSpace(pos, reader.bp);
 782                     break;
 783                 case LF: // (Spec 3.4)
 784                     reader.scanChar();
 785                     processLineTerminator(pos, reader.bp);

 786                     break;
 787                 case CR: // (Spec 3.4)
 788                     reader.scanChar();
 789                     if (reader.ch == LF) {
 790                         reader.scanChar();
 791                     }
 792                     processLineTerminator(pos, reader.bp);
 793                     break;

 794                 case 'A': case 'B': case 'C': case 'D': case 'E':
 795                 case 'F': case 'G': case 'H': case 'I': case 'J':
 796                 case 'K': case 'L': case 'M': case 'N': case 'O':
 797                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 798                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 799                 case 'Z':
 800                 case 'a': case 'b': case 'c': case 'd': case 'e':
 801                 case 'f': case 'g': case 'h': case 'i': case 'j':
 802                 case 'k': case 'l': case 'm': case 'n': case 'o':
 803                 case 'p': case 'q': case 'r': case 's': case 't':
 804                 case 'u': case 'v': case 'w': case 'x': case 'y':
 805                 case 'z':
 806                 case '$': case '_':
 807                     scanIdent();
 808                     break loop;
 809                 case '0':
 810                     reader.scanChar();
 811                     if (reader.ch == 'x' || reader.ch == 'X') {
 812                         reader.scanChar();

 813                         skipIllegalUnderscores();
 814                         scanNumber(pos, 16);
 815                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 816                         reader.scanChar();
 817                         skipIllegalUnderscores();
 818                         scanNumber(pos, 2);
 819                     } else {
 820                         reader.putChar('0');
 821                         if (reader.ch == '_') {
 822                             int savePos = reader.bp;
 823                             do {
 824                                 reader.scanChar();
 825                             } while (reader.ch == '_');
 826                             if (reader.digit(pos, 10) < 0) {
 827                                 lexError(savePos, Errors.IllegalUnderscore);
 828                             }
 829                         }

 830                         scanNumber(pos, 8);
 831                     }
 832                     break loop;

 833                 case '1': case '2': case '3': case '4':
 834                 case '5': case '6': case '7': case '8': case '9':
 835                     scanNumber(pos, 10);
 836                     break loop;
 837                 case '.':
 838                     reader.scanChar();
 839                     if (reader.digit(pos, 10) >= 0) {
 840                         reader.putChar('.');
 841                         scanFractionAndSuffix(pos);
 842                     } else if (reader.ch == '.') {
 843                         int savePos = reader.bp;
 844                         reader.putChar('.'); reader.putChar('.', true);
 845                         if (reader.ch == '.') {
 846                             reader.scanChar();
 847                             reader.putChar('.');
 848                             tk = TokenKind.ELLIPSIS;
 849                         } else {




 850                             lexError(savePos, Errors.IllegalDot);
 851                         }


 852                     } else {
 853                         tk = TokenKind.DOT;
 854                     }











 855                     break loop;
 856                 case ',':
 857                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 858                 case ';':
 859                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 860                 case '(':
 861                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 862                 case ')':
 863                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 864                 case '[':
 865                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 866                 case ']':
 867                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 868                 case '{':
 869                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 870                 case '}':
 871                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;















 872                 case '/':
 873                     reader.scanChar();
 874                     if (reader.ch == '/') {
 875                         do {
 876                             reader.scanCommentChar();
 877                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 878                         if (reader.bp < reader.buflen) {
 879                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 880                         }
 881                         break;
 882                     } else if (reader.ch == '*') {
 883                         boolean isEmpty = false;
 884                         reader.scanChar();
 885                         CommentStyle style;
 886                         if (reader.ch == '*') {

 887                             style = CommentStyle.JAVADOC;
 888                             reader.scanCommentChar();
 889                             if (reader.ch == '/') {
 890                                 isEmpty = true;
 891                             }
 892                         } else {
 893                             style = CommentStyle.BLOCK;
 894                         }
 895                         while (!isEmpty && reader.bp < reader.buflen) {
 896                             if (reader.ch == '*') {
 897                                 reader.scanChar();
 898                                 if (reader.ch == '/') break;



 899                             } else {
 900                                 reader.scanCommentChar();

 901                             }
 902                         }
 903                         if (reader.ch == '/') {
 904                             reader.scanChar();
 905                             comments = addComment(comments, processComment(pos, reader.bp, style));

 906                             break;
 907                         } else {
 908                             lexError(pos, Errors.UnclosedComment);

 909                             break loop;
 910                         }
 911                     } else if (reader.ch == '=') {
 912                         tk = TokenKind.SLASHEQ;
 913                         reader.scanChar();
 914                     } else {
 915                         tk = TokenKind.SLASH;
 916                     }
 917                     break loop;
 918                 case '\'':
 919                     reader.scanChar();
 920                     if (reader.ch == '\'') {


 921                         lexError(pos, Errors.EmptyCharLit);
 922                         reader.scanChar();
 923                     } else {
 924                         if (isEOLN())
 925                             lexError(pos, Errors.IllegalLineEndInCharLit);
 926                         scanLitChar(pos, true, false);
 927                         if (reader.ch == '\'') {
 928                             reader.scanChar();


 929                             tk = TokenKind.CHARLITERAL;
 930                         } else {
 931                             lexError(pos, Errors.UnclosedCharLit);
 932                         }
 933                     }
 934                     break loop;
 935                 case '\"':

 936                     scanString(pos);
 937                     break loop;

 938                 default:
 939                     if (isSpecial(reader.ch)) {
 940                         scanOperator();
 941                     } else {
 942                         boolean isJavaIdentifierStart;
 943                         int codePoint = -1;
 944                         if (reader.ch < '\u0080') {
 945                             // all ASCII range chars already handled, above
 946                             isJavaIdentifierStart = false;
 947                         } else {
 948                             codePoint = reader.peekSurrogates();
 949                             if (codePoint >= 0) {
 950                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 951                                     reader.putChar(true);
 952                                 }
 953                             } else {
 954                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 955                             }
 956                         }

 957                         if (isJavaIdentifierStart) {
 958                             scanIdent();
 959                         } else if (reader.digit(pos, 10) >= 0) {
 960                             scanNumber(pos, 10);
 961                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
 962                             tk = TokenKind.EOF;
 963                             pos = reader.realLength;
 964                         } else {
 965                             String arg;
 966 
 967                             if (codePoint >= 0) {
 968                                 char high = reader.ch;
 969                                 reader.scanChar();
 970                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
 971                             } else {
 972                                 arg = (32 < reader.ch && reader.ch < 127) ?
 973                                                 String.format("%s", reader.ch) :
 974                                                 String.format("\\u%04x", (int)reader.ch);

 975                             }

 976                             lexError(pos, Errors.IllegalChar(arg));
 977                             reader.scanChar();
 978                         }
 979                     }
 980                     break loop;
 981                 }
 982             }
 983             endPos = reader.bp;
 984             switch (tk.tag) {
 985                 case DEFAULT: return new Token(tk, pos, endPos, comments);
 986                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
 987                 case STRING: {



 988                     // Get characters from string buffer.
 989                     String string = reader.chars();

 990                     // If a text block.
 991                     if (isTextBlock && TextBlockSupport.hasSupport()) {
 992                         // Verify that the incidental indentation is consistent.
 993                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
 994                             Set<TextBlockSupport.WhitespaceChecks> checks =
 995                                     TextBlockSupport.checkWhitespace(string);
 996                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
 997                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
 998                                         Warnings.InconsistentWhiteSpaceIndentation);
 999                             }
1000                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1001                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1002                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
1003                             }
1004                         }
1005                         // Remove incidental indentation.
1006                         try {
1007                             string = TextBlockSupport.stripIndent(string);
1008                         } catch (Exception ex) {
1009                             // Error already reported, just use unstripped string.
1010                         }
1011                     }

1012                     // Translate escape sequences if present.
1013                     if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
1014                         try {
1015                             string = TextBlockSupport.translateEscapes(string);
1016                         } catch (Exception ex) {
1017                             // Error already reported, just use untranslated string.
1018                         }
1019                     }


1020                     // Build string token.
1021                     return new StringToken(tk, pos, endPos, string, comments);
1022                 }
1023                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1024                 default: throw new AssertionError();
1025             }
1026         }
1027         finally {


1028             if (scannerDebug) {
1029                     System.out.println("nextToken(" + pos
1030                                        + "," + endPos + ")=|" +
1031                                        new String(reader.getRawCharacters(pos, endPos))
1032                                        + "|");
1033             }
1034         }
1035     }
1036     //where
1037         List<Comment> addComment(List<Comment> comments, Comment comment) {








1038             return comments == null ?
1039                     List.of(comment) :
1040                     comments.prepend(comment);
1041         }
1042 
1043     /** Return the position where a lexical error occurred;



1044      */
1045     public int errPos() {
1046         return errPos;
1047     }
1048 
1049     /** Set the position where a lexical error occurred;



1050      */
1051     public void errPos(int pos) {
1052         errPos = pos;
1053     }
1054 
1055     /**
1056      * Called when a complete comment has been scanned. pos and endPos
1057      * will mark the comment boundary.






1058      */
1059     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1060         if (scannerDebug)
1061             System.out.println("processComment(" + pos
1062                                + "," + endPos + "," + style + ")=|"
1063                                + new String(reader.getRawCharacters(pos, endPos))
1064                                + "|");
1065         char[] buf = reader.getRawCharacters(pos, endPos);
1066         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);



1067     }
1068 
1069     /**
1070      * Called when a complete whitespace run has been scanned. pos and endPos
1071      * will mark the whitespace boundary.





1072      */
1073     protected void processWhiteSpace(int pos, int endPos) {
1074         if (scannerDebug)
1075             System.out.println("processWhitespace(" + pos
1076                                + "," + endPos + ")=|" +
1077                                new String(reader.getRawCharacters(pos, endPos))
1078                                + "|");
1079     }

1080 
1081     /**
1082      * Called when a line terminator has been processed.



1083      */
1084     protected void processLineTerminator(int pos, int endPos) {
1085         if (scannerDebug)
1086             System.out.println("processTerminator(" + pos
1087                                + "," + endPos + ")=|" +
1088                                new String(reader.getRawCharacters(pos, endPos))
1089                                + "|");
1090     }

1091 
1092     /** Build a map for translating between line numbers and
1093      * positions in the input.
1094      *
1095      * @return a LineMap */

1096     public Position.LineMap getLineMap() {
1097         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
1098     }
1099 
1100 
1101     /**
1102     * Scan a documentation comment; determine if a deprecated tag is present.
1103     * Called once the initial /, * have been skipped, positioned at the second *
1104     * (which is treated as the beginning of the first line).
1105     * Stops positioned at the closing '/'.
1106     */
1107     protected static class BasicComment<U extends UnicodeReader> implements Comment {
1108 





1109         CommentStyle cs;
1110         U comment_reader;
1111 



1112         protected boolean deprecatedFlag = false;




1113         protected boolean scanned = false;
1114 
1115         protected BasicComment(U comment_reader, CommentStyle cs) {
1116             this.comment_reader = comment_reader;








1117             this.cs = cs;
1118         }
1119 





1120         public String getText() {
1121             return null;
1122         }
1123 







1124         public int getSourcePos(int pos) {
1125             return -1;
1126         }
1127 








1128         public CommentStyle getStyle() {
1129             return cs;
1130         }
1131 





1132         public boolean isDeprecated() {
1133             if (!scanned && cs == CommentStyle.JAVADOC) {
1134                 scanDocComment();
1135             }

1136             return deprecatedFlag;
1137         }
1138 
1139         @SuppressWarnings("fallthrough")


1140         protected void scanDocComment() {
1141             try {
1142                 boolean deprecatedPrefix = false;
1143 
1144                 comment_reader.bp += 3; // '/**'
1145                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
1146 
1147                 forEachLine:
1148                 while (comment_reader.bp < comment_reader.buflen) {
1149 
1150                     // Skip optional WhiteSpace at beginning of line
1151                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1152                         comment_reader.scanCommentChar();
1153                     }
1154 
1155                     // Skip optional consecutive Stars
1156                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
1157                         comment_reader.scanCommentChar();
1158                         if (comment_reader.ch == '/') {
1159                             return;
1160                         }
1161                     }
1162 
1163                     // Skip optional WhiteSpace after Stars
1164                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1165                         comment_reader.scanCommentChar();
1166                     }
1167 
1168                     deprecatedPrefix = false;
1169                     // At beginning of line in the JavaDoc sense.
1170                     if (!deprecatedFlag) {
1171                         String deprecated = "@deprecated";
1172                         int i = 0;
1173                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
1174                             comment_reader.scanCommentChar();
1175                             i++;
1176                             if (i == deprecated.length()) {
1177                                 deprecatedPrefix = true;
1178                                 break;
1179                             }
1180                         }
1181                     }
1182 
1183                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
1184                         if (Character.isWhitespace(comment_reader.ch)) {
1185                             deprecatedFlag = true;
1186                         } else if (comment_reader.ch == '*') {
1187                             comment_reader.scanCommentChar();
1188                             if (comment_reader.ch == '/') {
1189                                 deprecatedFlag = true;
1190                                 return;
1191                             }
1192                         }
1193                     }
1194 
1195                     // Skip rest of line
1196                     while (comment_reader.bp < comment_reader.buflen) {
1197                         switch (comment_reader.ch) {
1198                             case '*':
1199                                 comment_reader.scanCommentChar();
1200                                 if (comment_reader.ch == '/') {

1201                                     return;
1202                                 }

1203                                 break;
1204                             case CR: // (Spec 3.4)
1205                                 comment_reader.scanCommentChar();
1206                                 if (comment_reader.ch != LF) {
1207                                     continue forEachLine;
1208                                 }
1209                             /* fall through to LF case */
1210                             case LF: // (Spec 3.4)
1211                                 comment_reader.scanCommentChar();
1212                                 continue forEachLine;

1213                             default:
1214                                 comment_reader.scanCommentChar();

1215                         }
1216                     } // rest of line
1217                 } // forEachLine
1218                 return;
1219             } finally {
1220                 scanned = true;
1221             }
1222         }
1223     }
1224 }
   1 /*
   2  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;
  33 import com.sun.tools.javac.file.JavacFileManager;
  34 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  35 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  36 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  37 import com.sun.tools.javac.util.*;
  38 import com.sun.tools.javac.util.JCDiagnostic.*;
  39 


  40 import java.nio.CharBuffer;

  41 import java.util.Set;
  42 import java.util.regex.Pattern;
  43 
  44 import static com.sun.tools.javac.parser.Tokens.*;
  45 import static com.sun.tools.javac.util.LayoutCharacters.EOI;
  46 
  47 /**
  48  * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
  49  * escape sequences into a token sequence.
  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer extends UnicodeReader {
  57     /**
  58      * If true then prints token information after each nextToken().
  59      */
  60     private static final boolean scannerDebug = false;
  61 
  62     /**
  63      * Sentinal for non-value.
  64      */
  65     private int NOT_FOUND = -1;
  66 
  67     /**
  68      * The source language setting. Copied from scanner factory.
  69      */
  70     private Source source;
  71 
  72     /**
  73      * The preview language setting. Copied from scanner factory.
  74      */
  75     private Preview preview;
  76 
  77     /**
  78      * The log to be used for error reporting. Copied from scanner factory.
  79      */
  80     private final Log log;
  81 
  82     /**
  83      * The token factory. Copied from scanner factory.
  84      */
  85     private final Tokens tokens;
  86 
  87     /**
  88      * The names factory. Copied from scanner factory.
  89      */
  90     private final Names names;
  91 
  92     /**
  93      * The token kind, set by nextToken().
  94      */
  95     protected TokenKind tk;
  96 
  97     /**
  98      * The token's radix, set by nextToken().
  99      */
 100     protected int radix;
 101 
 102     /**
 103      * The token's name, set by nextToken().
 104      */
 105     protected Name name;
 106 
 107     /**
 108      * The position where a lexical error occurred;
 109      */
 110     protected int errPos = Position.NOPOS;
 111 
 112     /**
 113      * true if is a text block, set by nextToken().



 114      */
 115     protected boolean isTextBlock;
 116 
 117     /**
 118      * true if contains escape sequences, set by nextToken().
 119      */
 120     protected boolean hasEscapeSequences;
 121 
 122     /**
 123      * Buffer for building literals, used by nextToken().
 124      */
 125     protected StringBuilder sb;
 126 
 127     /**
 128      * Origin scanner factory.
 129      */
 130     protected ScannerFactory fac;
 131 
 132     /**
 133      * The set of lint options currently in effect. It is initialized
 134      * from the context, and then is set/reset as needed by Attr as it
 135      * visits all the various parts of the trees during attribution.
 136      */
 137     protected Lint lint;
 138 
 139     /**
 140      * Construct a Java token scanner from the input character buffer.
 141      *
 142      * @param fac  the factory which created this Scanner.
 143      * @param cb   the input character buffer.
 144      */
 145     protected JavaTokenizer(ScannerFactory fac, CharBuffer cb) {
 146         this(fac, JavacFileManager.toArray(cb), cb.limit());
 147     }
 148 
 149     /**
 150      * Construct a Java token scanner from the input character array.



 151      *
 152      * @param fac     the factory which created this Scanner
 153      * @param array   the input character array.
 154      * @param length  The length of the meaningful content in the array.
 155      */
 156     protected JavaTokenizer(ScannerFactory fac, char[] array, int length) {
 157         super(fac, array, length);







 158         this.fac = fac;
 159         this.log = fac.log;
 160         this.names = fac.names;
 161         this.tokens = fac.tokens;
 162         this.source = fac.source;
 163         this.preview = fac.preview;

 164         this.lint = fac.lint;
 165         this.sb = new StringBuilder(256);
 166     }
 167 
 168     /**
 169      * Check the source level for a lexical feature.
 170      *
 171      * @param pos      position in input buffer.
 172      * @param feature  feature to verify.
 173      */
 174     protected void checkSourceLevel(int pos, Feature feature) {
 175         if (preview.isPreview(feature) && !preview.isEnabled()) {
 176             //preview feature without --preview flag, error
 177             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 178         } else if (!feature.allowedInSource(source)) {
 179             //incompatible source level, error
 180             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 181         } else if (preview.isPreview(feature)) {
 182             //use of preview feature, warn
 183             preview.warnPreview(pos, feature);
 184         }
 185     }
 186 
 187     /**
 188      * Report an error at the given position using the provided arguments.
 189      *
 190      * @param pos  position in input buffer.
 191      * @param key  error key to report.
 192      */
 193     protected void lexError(int pos, JCDiagnostic.Error key) {
 194         log.error(pos, key);
 195         tk = TokenKind.ERROR;
 196         errPos = pos;
 197     }
 198 
 199     /**
 200      * Report an error at the given position using the provided arguments.
 201      *
 202      * @param flags  diagnostic flags.
 203      * @param pos    position in input buffer.
 204      * @param key    error key to report.
 205      */
 206     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 207         log.error(flags, pos, key);
 208         tk = TokenKind.ERROR;
 209         errPos = pos;
 210     }
 211 
 212     /**
 213      * Report an error at the given position using the provided arguments.
 214      *
 215      * @param lc     lint category.
 216      * @param pos    position in input buffer.
 217      * @param key    error key to report.
 218      */
 219     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 220         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 221         log.warning(lc, dp, key);
 222     }
 223 
 224     /**
 225      * Add a character to the literal buffer.
 226      *
 227      * @param ch  character to add.
 228      */
 229     protected void put(char ch) {
 230         sb.append(ch);























 231     }
 232 
 233     /**
 234      * Add a codepoint to the literal buffer.
 235      *
 236      * @param codePoint  codepoint to add.
 237      */
 238     protected void putCodePoint(int codePoint) {
 239         sb.appendCodePoint(codePoint);
 240     }
 241 
 242     /**
 243      * Add current character or codepoint to the literal buffer.
 244      */
 245     protected void put() {
 246         if (isSurrogate()) {
 247             putCodePoint(getCodepoint());















 248         } else {
 249             put(get());














 250         }
 251     }
 252 
 253     /**
 254      * Add a string to the literal buffer.




 255      */
 256     protected void put(String string) {
 257         sb.append(string);
 258     }
 259 
 260     /**
 261      * Add current character or codepoint to the literal buffer then return next character.
 262      */
 263     protected char putThenNext() {
 264         put();
 265 
 266         return next();
 267     }

 268 
 269     /**
 270      * If the specified character ch matches the current character then add current character
 271      * to the literal buffer and then advance.
 272      *
 273      * @param ch  character to match.
 274      *
 275      * @return true if ch matches current character.
 276      */
 277     protected boolean acceptThenPut(char ch) {
 278         if (is(ch)) {
 279             put(get());
 280             next();




 281 
 282             return true;











































































 283         }
 284 
 285         return false;








 286     }
 287 
 288     /**
 289      * If either ch1 or ch2 matches the current character then add current character
 290      * to the literal buffer and then advance.
 291      *
 292      * @param ch1  first character to match.
 293      * @param ch2  second character to match.
 294      *
 295      * @return true if either ch1 or ch2 matches current character.
 296      */
 297     protected boolean acceptOneOfThenPut(char ch1, char ch2) {
 298         if (isOneOf(ch1, ch2)) {
 299             put(get());
 300             next();
 301 
 302             return true;

 303         }
 304 
 305         return false;
 306     }
 307 
 308     /**
 309      * Test if the current character is a line terminator.
 310      *
 311      * @return true if current character is a line terminator.
 312      */
 313     private boolean isEOLN() {
 314         return isOneOf('\n', '\r');
 315     }
 316 
 317     /**
 318      * Skip and process a line terminator sequence.
 319      */
 320     private void skipLineTerminator() {
 321         int start = position();
 322         accept('\r');
 323         accept('\n');
 324         processLineTerminator(start, position());
 325     }
 326 
 327     /**
 328      * Processes the current character and places in the literal buffer. If the current
 329      * character is a backslash then the next character is validated as a proper
 330      * escape character. Conversion of escape sequences takes place at end of nextToken().
 331      *
 332      * @param pos position of the first character in literal.
 333      */
 334     private void scanLitChar(int pos) {
 335         if (acceptThenPut('\\')) {
 336             hasEscapeSequences = true;
 337 
 338             switch (get()) {
 339                 case '0': case '1': case '2': case '3':
 340                 case '4': case '5': case '6': case '7':
 341                     char leadch = get();
 342                     putThenNext();
 343 
 344                     if (inRange('0', '7')) {
 345                         putThenNext();
 346 
 347                         if (leadch <= '3' && inRange('0', '7')) {
 348                             putThenNext();
 349                         }

 350                     }
 351                     break;
 352 
 353                 case 'b':
 354                 case 't':
 355                 case 'n':
 356                 case 'f':
 357                 case 'r':
 358                 case '\'':
 359                 case '\"':
 360                 case '\\':
 361                     putThenNext();
 362                     break;
 363 
 364                 case 's':
 365                     checkSourceLevel(position(), Feature.TEXT_BLOCKS);
 366                     putThenNext();
 367                     break;
 368 
 369                 case '\n':
 370                 case '\r':
 371                     if (isTextBlock) {
 372                         skipLineTerminator();
 373                         // Normalize line terminator.
 374                         put('\n');
 375                     } else {
 376                         lexError(position(), Errors.IllegalEscChar);
 377                     }
 378                     break;
 379 
 380                 default:
 381                     lexError(position(), Errors.IllegalEscChar);
 382                     break;
 383             }
 384         } else {
 385             putThenNext();
 386         }


 387     }
 388 
 389     /**
 390      * Scan a string literal or text block.
 391      *
 392      * @param pos  position of the first character in literal.
 393      */
 394     private void scanString(int pos) {
 395         // Assume the best.










 396         tk = Tokens.TokenKind.STRINGLITERAL;
 397         // Track the end of first line for error recovery.
 398         int firstEOLN = NOT_FOUND;
 399         // Check for text block delimiter.
 400         isTextBlock = accept("\"\"\"");
 401 
 402         if (isTextBlock) {
 403             // Check if preview feature is enabled for text blocks.
 404             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 405 
 406             // Verify the open delimiter sequence.
 407             // Error if the open delimiter sequence is not """<white space>*<LineTerminator>.
 408             skipWhitespace();
 409 
 410             if (isEOLN()) {
 411                 skipLineTerminator();
 412             } else {
 413                 lexError(position(), Errors.IllegalTextBlockOpen);
 414                 return;
 415             }
 416 
 417             // While characters are available.
 418             while (!isEOF()) {
 419                 if (accept("\"\"\"")) {
 420                     return;
 421                 }
 422 
 423                 if (isEOLN()) {
 424                     skipLineTerminator();
 425                     // Add normalized line terminator to literal buffer.
 426                     put('\n');
 427 
 428                     // Record first line terminator for error recovery.
 429                     if (firstEOLN == NOT_FOUND) {
 430                         firstEOLN = position();
 431                     }
 432                 } else {
 433                     // Add character to string buffer.
 434                     scanLitChar(pos);


 435                 }

 436             }
 437         } else {
 438             // Skip first quote.
 439             next();
 440 
 441             // While characters are available.
 442             while (!isEOF()) {
 443                 if (accept('\"')) {






 444                     return;
 445                 }
 446 
 447                 if (isEOLN()) {

 448                     // Line terminator in string literal is an error.
 449                     // Fall out to unclosed string literal error.

 450                     break;















 451                 } else {
 452                     // Add character to string buffer.
 453                     scanLitChar(pos);
 454                 }
 455             }
 456         }
 457 
 458         // String ended without close delimiter sequence.
 459         lexError(pos, isTextBlock ? Errors.UnclosedTextBlock : Errors.UnclosedStrLit);
 460 
 461         if (firstEOLN  != NOT_FOUND) {
 462             // Reset recovery position to point after text block open delimiter sequence.
 463             reset(firstEOLN);
 464         }
 465     }
 466 
 467     /**
 468      * Scan sequence of digits.
 469      *
 470      * @param pos         position of the first character in literal.
 471      * @param digitRadix  radix of numeric literal.
 472      */
 473     private void scanDigits(int pos, int digitRadix) {
 474         int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
 475         int trailingUnderscorePos;
 476 
 477         do {
 478             if (!is('_')) {
 479                 put();
 480                 trailingUnderscorePos = NOT_FOUND;
 481             } else {
 482                 trailingUnderscorePos = position();
 483             }
 484 
 485             next();
 486         } while (digit(pos, digitRadix) >= 0 || is('_'));
 487 
 488         if (leadingUnderscorePos != NOT_FOUND) {
 489             lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
 490         } else if (trailingUnderscorePos != NOT_FOUND) {
 491             lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
 492         }






 493     }
 494 
 495     /**
 496      * Read fractional part of hexadecimal floating point number.
 497      *
 498      * @param pos  position of the first character in literal.
 499      */
 500     private void scanHexExponentAndSuffix(int pos) {
 501         if (acceptOneOfThenPut('p', 'P')) {

 502             skipIllegalUnderscores();
 503             acceptOneOfThenPut('+', '-');


 504             skipIllegalUnderscores();
 505 
 506             if (digit(pos, 10) >= 0) {
 507                 scanDigits(pos, 10);
 508             } else {


 509                 lexError(pos, Errors.MalformedFpLit);
 510             }
 511         } else {
 512             lexError(pos, Errors.MalformedFpLit);
 513         }
 514 
 515         if (acceptOneOfThenPut('f', 'F')) {
 516             tk = TokenKind.FLOATLITERAL;
 517             radix = 16;
 518         } else {
 519             acceptOneOfThenPut('d', 'D');


 520             tk = TokenKind.DOUBLELITERAL;
 521             radix = 16;
 522         }
 523     }
 524 
 525     /**
 526      * Read fractional part of floating point number.
 527      *
 528      * @param pos  position of the first character in literal.
 529      */
 530     private void scanFraction(int pos) {
 531         skipIllegalUnderscores();
 532 
 533         if (digit(pos, 10) >= 0) {
 534             scanDigits(pos, 10);
 535         }
 536 
 537         int index = sb.length();
 538 
 539         if (acceptOneOfThenPut('e', 'E')) {
 540             skipIllegalUnderscores();
 541             acceptOneOfThenPut('+', '-');


 542             skipIllegalUnderscores();
 543 
 544             if (digit(pos, 10) >= 0) {
 545                 scanDigits(pos, 10);
 546                 return;
 547             }
 548 
 549             lexError(pos, Errors.MalformedFpLit);
 550             sb.setLength(index);
 551         }
 552     }
 553 
 554     /**
 555      * Read fractional part and 'd' or 'f' suffix of floating point number.
 556      *
 557      * @param pos  position of the first character in literal.
 558      */
 559     private void scanFractionAndSuffix(int pos) {
 560         radix = 10;
 561         scanFraction(pos);
 562 
 563         if (acceptOneOfThenPut('f', 'F')) {
 564              tk = TokenKind.FLOATLITERAL;
 565         } else {
 566             acceptOneOfThenPut('d', 'D');


 567             tk = TokenKind.DOUBLELITERAL;
 568         }
 569     }
 570 
 571     /**
 572      * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
 573      *
 574      * @param pos  position of the first character in literal.
 575      */
 576     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 577         radix = 16;
 578         Assert.check(is('.'));
 579         putThenNext();
 580         skipIllegalUnderscores();
 581 
 582         if (digit(pos, 16) >= 0) {
 583             seendigit = true;
 584             scanDigits(pos, 16);
 585         }
 586 
 587         if (!seendigit)
 588             lexError(pos, Errors.InvalidHexNumber);
 589         else
 590             scanHexExponentAndSuffix(pos);
 591     }
 592 
 593     /**
 594      * Skip over underscores and report as a error if found.
 595      */
 596     private void skipIllegalUnderscores() {
 597         if (is('_')) {
 598             lexError(position(), Errors.IllegalUnderscore);
 599             skip('_');

 600         }
 601     }
 602 
 603     /**
 604      * Read a number. (Spec. 3.10)
 605      *
 606      * @param pos    position of the first character in literal.
 607      * @param radix  the radix of the number; one of 2, 8, 10, 16.
 608      */
 609     private void scanNumber(int pos, int radix) {
 610         // for octal, allow base-10 digit in case it's a float literal
 611         this.radix = radix;
 612         int digitRadix = (radix == 8 ? 10 : radix);
 613         int firstDigit = digit(pos, Math.max(10, digitRadix));
 614         boolean seendigit = firstDigit >= 0;
 615         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 616 
 617         if (seendigit) {
 618             scanDigits(pos, digitRadix);
 619         }
 620 
 621         if (radix == 16 && is('.')) {
 622             scanHexFractionAndSuffix(pos, seendigit);
 623         } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
 624             scanHexExponentAndSuffix(pos);
 625         } else if (digitRadix == 10 && is('.')) {
 626             putThenNext();
 627             scanFractionAndSuffix(pos);
 628         } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {



 629             scanFractionAndSuffix(pos);
 630         } else {
 631             if (!seenValidDigit) {
 632                 switch (radix) {
 633                 case 2:
 634                     lexError(pos, Errors.InvalidBinaryNumber);
 635                     break;
 636                 case 16:
 637                     lexError(pos, Errors.InvalidHexNumber);
 638                     break;
 639                 }
 640             }
 641 
 642             if (acceptOneOf('l', 'L')) {
 643                 tk = TokenKind.LONGLITERAL;
 644             } else {
 645                 tk = TokenKind.INTLITERAL;
 646             }
 647         }
 648     }
 649 
 650     /**
 651      * Determines if the sequence in the literal buffer is a token (keyword, operator.)
 652      */
 653     private void checkIdent() {
 654         name = names.fromString(sb.toString());
 655         tk = tokens.lookupKind(name);
 656     }
 657 
 658     /**
 659      * Read an identifier. (Spec. 3.8)
 660      */
 661     private void scanIdent() {
 662         putThenNext();
 663 

 664         do {
 665             switch (get()) {
 666             case 'A': case 'B': case 'C': case 'D': case 'E':
 667             case 'F': case 'G': case 'H': case 'I': case 'J':
 668             case 'K': case 'L': case 'M': case 'N': case 'O':
 669             case 'P': case 'Q': case 'R': case 'S': case 'T':
 670             case 'U': case 'V': case 'W': case 'X': case 'Y':
 671             case 'Z':
 672             case 'a': case 'b': case 'c': case 'd': case 'e':
 673             case 'f': case 'g': case 'h': case 'i': case 'j':
 674             case 'k': case 'l': case 'm': case 'n': case 'o':
 675             case 'p': case 'q': case 'r': case 's': case 't':
 676             case 'u': case 'v': case 'w': case 'x': case 'y':
 677             case 'z':
 678             case '$': case '_':
 679             case '0': case '1': case '2': case '3': case '4':
 680             case '5': case '6': case '7': case '8': case '9':
 681                 break;
 682 
 683             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 684             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 685             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 686             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 687             case '\u0015': case '\u0016': case '\u0017':
 688             case '\u0018': case '\u0019': case '\u001B':
 689             case '\u007F':
 690                 next();
 691                 continue;
 692 
 693             case '\u001A': // EOI is also a legal identifier part
 694                 if (isEOF()) {
 695                     checkIdent();

 696                     return;
 697                 }
 698 
 699                 next();
 700                 continue;
 701 
 702             default:
 703                 boolean isJavaIdentifierPart;
 704 
 705                 if (isASCII()) {
 706                     // all ASCII range chars already handled, above
 707                     isJavaIdentifierPart = false;
 708                 } else {
 709                     if (Character.isIdentifierIgnorable(get())) {
 710                         next();
 711                         continue;









 712                     }
 713 
 714                     isJavaIdentifierPart = isSurrogate()
 715                             ? Character.isJavaIdentifierPart(getCodepoint())
 716                             : Character.isJavaIdentifierPart(get());
 717                 }
 718 
 719                 if (!isJavaIdentifierPart) {
 720                     checkIdent();

 721                     return;
 722                 }
 723             }
 724 
 725             putThenNext();
 726         } while (true);
 727     }
 728 
 729     /**
 730      * Return true if ch can be part of an operator.
 731      *
 732      * @param ch  character to check.
 733      *
 734      * @return true if ch can be part of an operator.
 735      */
 736     private boolean isSpecial(char ch) {
 737         switch (ch) {
 738         case '!': case '%': case '&': case '*': case '?':
 739         case '+': case '-': case ':': case '<': case '=':
 740         case '>': case '^': case '|': case '~':
 741         case '@':
 742             return true;
 743 
 744         default:
 745             return false;
 746         }
 747     }
 748 
 749     /**
 750      * Read longest possible sequence of special characters and convert to token.
 751      */
 752     private void scanOperator() {
 753         while (true) {
 754             put();
 755             TokenKind newtk = tokens.lookupKind(sb.toString());
 756 
 757             if (newtk == TokenKind.IDENTIFIER) {
 758                 sb.setLength(sb.length() - 1);
 759                 break;
 760             }
 761 
 762             tk = newtk;
 763             next();
 764 
 765             if (!isSpecial(get())) {
 766                 break;
 767             }



 768         }
 769     }
 770 
 771     /**
 772      * Read token (main entrypoint.)
 773      */
 774     public Token readToken() {
 775         sb.setLength(0);

 776         name = null;
 777         radix = 0;
 778         isTextBlock = false;
 779         hasEscapeSequences = false;
 780 
 781         int pos;

 782         List<Comment> comments = null;
 783 
 784         try {
 785             loop: while (true) {
 786                 pos = position();
 787 
 788                 switch (get()) {
 789                 case ' ':  // (Spec 3.6)
 790                 case '\t': // (Spec 3.6)
 791                 case '\f': // (Spec 3.6)
 792                     skipWhitespace();
 793                     processWhiteSpace(pos, position());


 794                     break;
 795 
 796                 case '\n': // (Spec 3.4)
 797                     next();
 798                     processLineTerminator(pos, position());
 799                     break;
 800 
 801                 case '\r': // (Spec 3.4)
 802                     next();
 803                     accept('\n');
 804                     processLineTerminator(pos, position());

 805                     break;
 806 
 807                 case 'A': case 'B': case 'C': case 'D': case 'E':
 808                 case 'F': case 'G': case 'H': case 'I': case 'J':
 809                 case 'K': case 'L': case 'M': case 'N': case 'O':
 810                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 811                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 812                 case 'Z':
 813                 case 'a': case 'b': case 'c': case 'd': case 'e':
 814                 case 'f': case 'g': case 'h': case 'i': case 'j':
 815                 case 'k': case 'l': case 'm': case 'n': case 'o':
 816                 case 'p': case 'q': case 'r': case 's': case 't':
 817                 case 'u': case 'v': case 'w': case 'x': case 'y':
 818                 case 'z':
 819                 case '$': case '_': // (Spec. 3.8)
 820                     scanIdent();
 821                     break loop;
 822 
 823                 case '0': // (Spec. 3.10)
 824                     next();
 825 
 826                     if (acceptOneOf('x', 'X')) {
 827                         skipIllegalUnderscores();
 828                         scanNumber(pos, 16);
 829                     } else if (acceptOneOf('b', 'B')) {

 830                         skipIllegalUnderscores();
 831                         scanNumber(pos, 2);
 832                     } else {
 833                         put('0');
 834 
 835                         if (is('_')) {
 836                             int savePos = position();
 837                             skip('_');
 838 
 839                             if (digit(pos, 10) < 0) {
 840                                 lexError(savePos, Errors.IllegalUnderscore);
 841                             }
 842                         }
 843 
 844                         scanNumber(pos, 8);
 845                     }
 846                     break loop;
 847 
 848                 case '1': case '2': case '3': case '4':
 849                 case '5': case '6': case '7': case '8': case '9':  // (Spec. 3.10)
 850                     scanNumber(pos, 10);
 851                     break loop;
 852 
 853                 case '.': // (Spec. 3.12)
 854                     if (accept("...")) {
 855                         put("...");







 856                         tk = TokenKind.ELLIPSIS;
 857                     } else {
 858                         next();
 859                         int savePos = position();
 860 
 861                         if (accept('.')) {
 862                             lexError(savePos, Errors.IllegalDot);
 863                         } else if (digit(pos, 10) >= 0) {
 864                             put('.');
 865                             scanFractionAndSuffix(pos); // (Spec. 3.10)
 866                         } else {
 867                             tk = TokenKind.DOT;
 868                         }
 869                     }
 870                     break loop;
 871 
 872                 case ',': // (Spec. 3.12)
 873                     next();
 874                     tk = TokenKind.COMMA;
 875                     break loop;
 876 
 877                 case ';': // (Spec. 3.12)
 878                     next();
 879                     tk = TokenKind.SEMI;
 880                     break loop;
 881 
 882                 case '(': // (Spec. 3.12)
 883                     next();
 884                     tk = TokenKind.LPAREN;
 885                     break loop;
 886 
 887                 case ')': // (Spec. 3.12)
 888                     next();
 889                     tk = TokenKind.RPAREN;
 890                     break loop;
 891 
 892                 case '[': // (Spec. 3.12)
 893                     next();
 894                     tk = TokenKind.LBRACKET;
 895                     break loop;
 896 
 897                 case ']': // (Spec. 3.12)
 898                     next();
 899                     tk = TokenKind.RBRACKET;
 900                     break loop;
 901 
 902                 case '{': // (Spec. 3.12)
 903                     next();
 904                     tk = TokenKind.LBRACE;
 905                     break loop;
 906 
 907                 case '}': // (Spec. 3.12)
 908                     next();
 909                     tk = TokenKind.RBRACE;
 910                     break loop;
 911 
 912                 case '/':
 913                     next();
 914 
 915                     if (accept('/')) { // (Spec. 3.7)
 916                         skipToEOLN();
 917 
 918                         if (!isEOF()) {
 919                             comments = appendComment(comments, processComment(pos, position(), CommentStyle.LINE));
 920                         }
 921                         break;
 922                     } else if (accept('*')) { // (Spec. 3.7)
 923                         boolean isEmpty = false;

 924                         CommentStyle style;
 925 
 926                         if (accept('*')) {
 927                             style = CommentStyle.JAVADOC;
 928 
 929                             if (is('/')) {
 930                                 isEmpty = true;
 931                             }
 932                         } else {
 933                             style = CommentStyle.BLOCK;
 934                         }
 935 
 936                         if (!isEmpty) {
 937                             while (!isEOF()) {
 938                                 if (accept('*')) {
 939                                     if (is('/')) {
 940                                         break;
 941                                     }
 942                                 } else {
 943                                     next();
 944                                 }
 945                             }
 946                         }
 947 
 948                         if (accept('/')) {
 949                             comments = appendComment(comments, processComment(pos, position(), style));
 950 
 951                             break;
 952                         } else {
 953                             lexError(pos, Errors.UnclosedComment);
 954 
 955                             break loop;
 956                         }
 957                     } else if (accept('=')) {
 958                         tk = TokenKind.SLASHEQ; // (Spec. 3.12)

 959                     } else {
 960                         tk = TokenKind.SLASH; // (Spec. 3.12)
 961                     }
 962                     break loop;
 963 
 964                 case '\'': // (Spec. 3.10)
 965                     next();
 966 
 967                     if (accept('\'')) {
 968                         lexError(pos, Errors.EmptyCharLit);

 969                     } else {
 970                         if (isEOLN()) {
 971                             lexError(pos, Errors.IllegalLineEndInCharLit);
 972                         }
 973 
 974                         scanLitChar(pos);
 975 
 976                         if (accept('\'')) {
 977                             tk = TokenKind.CHARLITERAL;
 978                         } else {
 979                             lexError(pos, Errors.UnclosedCharLit);
 980                         }
 981                     }
 982                     break loop;
 983 
 984                 case '\"': // (Spec. 3.10)
 985                     scanString(pos);
 986                     break loop;
 987 
 988                 default:
 989                     if (isSpecial(get())) {
 990                         scanOperator();
 991                     } else {
 992                         boolean isJavaIdentifierStart;
 993 
 994                         if (isASCII()) {
 995                             // all ASCII range chars already handled, above
 996                             isJavaIdentifierStart = false;
 997                         } else {
 998                             isJavaIdentifierStart = isSurrogate()
 999                                     ? Character.isJavaIdentifierStart(getCodepoint())
1000                                     : Character.isJavaIdentifierStart(get());





1001                         }
1002 
1003                         if (isJavaIdentifierStart) {
1004                             scanIdent();
1005                         } else if (digit(pos, 10) >= 0) {
1006                             scanNumber(pos, 10);
1007                         } else if (is((char)EOI) || isEOF()) {
1008                             tk = TokenKind.EOF;
1009                             pos = position();
1010                         } else {
1011                             String arg;
1012 
1013                             if (isSurrogate()) {
1014                                 int codePoint = getCodepoint();
1015                                 char hi = Character.highSurrogate(codePoint);
1016                                 char lo = Character.lowSurrogate(codePoint);
1017                                 arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
1018                             } else {
1019                                 char ch = get();
1020                                 arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
1021                                                               String.format("\\u%04x", (int) ch);
1022                             }
1023 
1024                             lexError(pos, Errors.IllegalChar(arg));
1025                             next();
1026                         }
1027                     }
1028                     break loop;
1029                 }
1030             }
1031 
1032             int endPos = position();
1033 
1034             if (tk.tag == Token.Tag.DEFAULT) {
1035                 return new Token(tk, pos, endPos, comments);
1036             } else  if (tk.tag == Token.Tag.NAMED) {
1037                 return new NamedToken(tk, pos, endPos, name, comments);
1038             } else {
1039                 // Get characters from string buffer.
1040                 String string = sb.toString();
1041 
1042                 // If a text block.
1043                 if (isTextBlock) {
1044                     // Verify that the incidental indentation is consistent.
1045                     if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
1046                         Set<TextBlockSupport.WhitespaceChecks> checks =
1047                                 TextBlockSupport.checkWhitespace(string);
1048                         if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
1049                             lexWarning(LintCategory.TEXT_BLOCKS, pos,
1050                                     Warnings.InconsistentWhiteSpaceIndentation);
1051                         }
1052                         if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1053                             lexWarning(LintCategory.TEXT_BLOCKS, pos,
1054                                     Warnings.TrailingWhiteSpaceWillBeRemoved);
1055                         }
1056                     }
1057                     // Remove incidental indentation.
1058                     try {
1059                         string = string.stripIndent();
1060                     } catch (Exception ex) {
1061                         // Error already reported, just use unstripped string.
1062                     }
1063                 }
1064 
1065                 // Translate escape sequences if present.
1066                 if (hasEscapeSequences) {
1067                     try {
1068                         string = string.translateEscapes();
1069                     } catch (Exception ex) {
1070                         // Error already reported, just use untranslated string.
1071                     }
1072                 }
1073 
1074                 if (tk.tag == Token.Tag.STRING) {
1075                     // Build string token.
1076                     return new StringToken(tk, pos, endPos, string, comments);
1077                 } else {
1078                     // Build numeric token.
1079                     return new NumericToken(tk, pos, endPos, string, radix, comments);
1080                 }
1081             }
1082         } finally {
1083             int endPos = position();
1084 
1085             if (scannerDebug) {
1086                     System.out.println("nextToken(" + pos
1087                                        + "," + endPos + ")=|" +
1088                                        new String(getRawCharacters(pos, endPos))
1089                                        + "|");
1090             }
1091         }
1092     }
1093 
1094     /**
1095      * Appends a comment to the list of comments preceding the current token.
1096      *
1097      * @param comments  existing list of comments.
1098      * @param comment   comment to append.
1099      *
1100      * @return new list with comment prepended to the existing list.
1101      */
1102     List<Comment> appendComment(List<Comment> comments, Comment comment) {
1103         return comments == null ?
1104                 List.of(comment) :
1105                 comments.prepend(comment);
1106     }
1107 
1108     /**
1109      * Return the position where a lexical error occurred.
1110      *
1111      * @return position in the input buffer of where the error occurred.
1112      */
1113     public int errPos() {
1114         return errPos;
1115     }
1116 
1117     /**
1118      * Set the position where a lexical error occurred.
1119      *
1120      * @param pos  position in the input buffer of where the error occurred.
1121      */
1122     public void errPos(int pos) {
1123         errPos = pos;
1124     }
1125 
1126     /**
1127      * Called when a complete comment has been scanned. pos and endPos
1128      * will mark the comment boundary.
1129      *
1130      * @param pos     position of the opening / in the input buffer.
1131      * @param endPos  position + 1 of the closing / in the input buffer.
1132      * @param style   style of comment.
1133      *
1134      * @return the constructed BasicComment.
1135      */
1136     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1137         if (scannerDebug) {
1138             System.out.println("processComment(" + pos
1139                                 + "," + endPos + "," + style + ")=|"
1140                                 + new String(getRawCharacters(pos, endPos))
1141                                 + "|");
1142         }
1143 
1144         char[] buf = getRawCharacters(pos, endPos);
1145 
1146         return new BasicComment(style, fac, buf, pos);
1147     }
1148 
1149     /**
1150      * Called when a complete whitespace run has been scanned. pos and endPos
1151      * will mark the whitespace boundary.
1152      *
1153      * (Spec 3.6)
1154      *
1155      * @param pos     position in input buffer of first whitespace character.
1156      * @param endPos  position + 1 in input buffer of last whitespace character.
1157      */
1158     protected void processWhiteSpace(int pos, int endPos) {
1159         if (scannerDebug) {
1160             System.out.println("processWhitespace(" + pos
1161                                 + "," + endPos + ")=|" +
1162                                 new String(getRawCharacters(pos, endPos))
1163                                 + "|");
1164         }
1165     }
1166 
1167     /**
1168      * Called when a line terminator has been processed.
1169      *
1170      * @param pos     position in input buffer of first character in sequence.
1171      * @param endPos  position + 1 in input buffer of last character in sequence.
1172      */
1173     protected void processLineTerminator(int pos, int endPos) {
1174         if (scannerDebug) {
1175             System.out.println("processTerminator(" + pos
1176                                 + "," + endPos + ")=|" +
1177                                 new String(getRawCharacters(pos, endPos))
1178                                 + "|");
1179         }
1180     }
1181 
1182     /**
1183      * Build a map for translating between line numbers and positions in the input.
1184      *
1185      * @return a LineMap
1186      */
1187     public Position.LineMap getLineMap() {
1188         return Position.makeLineMap(getRawCharacters(), length(), false);
1189     }
1190 

1191     /**
1192      * Scan a documentation comment; determine if a deprecated tag is present.
1193      * Called once the initial /, * have been skipped, positioned at the second *
1194      * (which is treated as the beginning of the first line).
1195      * Stops positioned at the closing '/'.
1196      */
1197     protected static class BasicComment extends PositionTrackingReader implements Comment {
1198         /**
1199          * Style of comment
1200          *   LINE starting with //
1201          *   BLOCK starting with /*
1202          *   JAVADOC starting with /**
1203          */
1204         CommentStyle cs;

1205 
1206         /**
1207          * true if comment contains @deprecated at beginning of a line.
1208          */
1209         protected boolean deprecatedFlag = false;
1210 
1211         /**
1212          * true if comment has been fully scanned.
1213          */
1214         protected boolean scanned = false;
1215 
1216         /**
1217          * Constructor.
1218          *
1219          * @param cs      comment style
1220          * @param sf      Scan factory.
1221          * @param array   Array containing contents of source.
1222          * @param offset  Position offset in original source buffer.
1223          */
1224         protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
1225             super(sf, array, offset);
1226             this.cs = cs;
1227         }
1228 
1229         /**
1230          * Return comment body text minus comment adornments or null if not scanned.
1231          *
1232          * @return comment body text.
1233          */
1234         public String getText() {
1235             return null;
1236         }
1237 
1238         /**
1239          * Return buffer position in original buffer mapped from buffer position in comment.
1240          *
1241          * @param pos  buffer position in comment.
1242          *
1243          * @return buffer position in original buffer.
1244          */
1245         public int getSourcePos(int pos) {
1246             return -1;
1247         }
1248 
1249         /**
1250          * Return style of comment.
1251          *   LINE starting with //
1252          *   BLOCK starting with /*
1253          *   JAVADOC starting with /**
1254          *
1255          * @return
1256          */
1257         public CommentStyle getStyle() {
1258             return cs;
1259         }
1260 
1261         /**
1262          * true if comment contains @deprecated at beginning of a line.
1263          *
1264          * @return true if comment contains @deprecated.
1265          */
1266         public boolean isDeprecated() {
1267             if (!scanned && cs == CommentStyle.JAVADOC) {
1268                 scanDocComment();
1269             }
1270 
1271             return deprecatedFlag;
1272         }
1273 
1274         /**
1275          * Scan JAVADOC comment for details.
1276          */
1277         protected void scanDocComment() {
1278             try {
1279                 boolean deprecatedPrefix = false;
1280                 accept("/**");


1281 
1282                 forEachLine:
1283                 while (!isEOF()) {

1284                     // Skip optional WhiteSpace at beginning of line
1285                     skipWhitespace();


1286 
1287                     // Skip optional consecutive Stars
1288                     while (accept('*')) {
1289                         if (is('/')) {

1290                             return;
1291                         }
1292                     }
1293 
1294                     // Skip optional WhiteSpace after Stars
1295                     skipWhitespace();


1296 

1297                     // At beginning of line in the JavaDoc sense.
1298                     deprecatedPrefix = deprecatedFlag || accept("@deprecated");











1299 
1300                     if (deprecatedPrefix && !isEOF()) {
1301                         if (Character.isWhitespace(get())) {
1302                             deprecatedFlag = true;
1303                         } else if (accept('*')) {
1304                             if (is('/')) {

1305                                 deprecatedFlag = true;
1306                                 return;
1307                             }
1308                         }
1309                     }
1310 
1311                     // Skip rest of line
1312                     while (!isEOF()) {
1313                         switch (get()) {
1314                             case '*':
1315                                 next();
1316 
1317                                 if (is('/')) {
1318                                     return;
1319                                 }
1320 
1321                                 break;
1322                             case '\r': // (Spec 3.4)
1323                             case '\n': // (Spec 3.4)
1324                                 accept('\r');
1325                                 accept('\n');




1326                                 continue forEachLine;
1327 
1328                             default:
1329                                 next();
1330                                 break;
1331                         }
1332                     } // rest of line
1333                 } // forEachLine
1334                 return;
1335             } finally {
1336                 scanned = true;
1337             }
1338         }
1339     }
1340 }
< prev index next >