1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.code.Lint; 29 import com.sun.tools.javac.code.Lint.LintCategory; 30 import com.sun.tools.javac.code.Preview; 31 import com.sun.tools.javac.code.Source; 32 import com.sun.tools.javac.code.Source.Feature; 33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 34 import com.sun.tools.javac.resources.CompilerProperties.Errors; 35 import com.sun.tools.javac.resources.CompilerProperties.Warnings; 36 import com.sun.tools.javac.util.*; 37 import com.sun.tools.javac.util.JCDiagnostic.*; 38 39 import java.lang.reflect.InvocationTargetException; 40 import java.lang.reflect.Method; 41 import java.nio.CharBuffer; 42 import java.util.HashSet; 43 import java.util.Set; 44 45 import static com.sun.tools.javac.parser.Tokens.*; 46 import static com.sun.tools.javac.util.LayoutCharacters.*; 47 48 /** The lexical analyzer maps an input stream consisting of 49 * ASCII characters and Unicode escapes into a token sequence. 50 * 51 * <p><b>This is NOT part of any supported API. 52 * If you write code that depends on this, you do so at your own risk. 53 * This code and its internal interfaces are subject to change or 54 * deletion without notice.</b> 55 */ 56 public class JavaTokenizer { 57 58 private static final boolean scannerDebug = false; 59 60 /** The source language setting. 61 */ 62 private Source source; 63 64 /** The preview language setting. */ 65 private Preview preview; 66 67 /** The log to be used for error reporting. 68 */ 69 private final Log log; 70 71 /** The token factory. */ 72 private final Tokens tokens; 73 74 /** The token kind, set by nextToken(). 75 */ 76 protected TokenKind tk; 77 78 /** The token's radix, set by nextToken(). 79 */ 80 protected int radix; 81 82 /** The token's name, set by nextToken(). 83 */ 84 protected Name name; 85 86 /** The position where a lexical error occurred; 87 */ 88 protected int errPos = Position.NOPOS; 89 90 /** The Unicode reader (low-level stream reader). 91 */ 92 protected UnicodeReader reader; 93 94 /** If is a text block 95 */ 96 protected boolean isTextBlock; 97 98 /** If contains escape sequences 99 */ 100 protected boolean hasEscapeSequences; 101 102 protected ScannerFactory fac; 103 104 // The set of lint options currently in effect. It is initialized 105 // from the context, and then is set/reset as needed by Attr as it 106 // visits all the various parts of the trees during attribution. 107 protected Lint lint; 108 109 private static final boolean hexFloatsWork = hexFloatsWork(); 110 private static boolean hexFloatsWork() { 111 try { 112 Float.valueOf("0x1.0p1"); 113 return true; 114 } catch (NumberFormatException ex) { 115 return false; 116 } 117 } 118 119 /** 120 * Create a scanner from the input array. This method might 121 * modify the array. To avoid copying the input array, ensure 122 * that {@code inputLength < input.length} or 123 * {@code input[input.length -1]} is a white space character. 124 * 125 * @param fac the factory which created this Scanner 126 * @param buf the input, might be modified 127 * Must be positive and less than or equal to input.length. 128 */ 129 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 130 this(fac, new UnicodeReader(fac, buf)); 131 } 132 133 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 134 this(fac, new UnicodeReader(fac, buf, inputLength)); 135 } 136 137 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 138 this.fac = fac; 139 this.log = fac.log; 140 this.tokens = fac.tokens; 141 this.source = fac.source; 142 this.preview = fac.preview; 143 this.reader = reader; 144 this.lint = fac.lint; 145 } 146 147 protected void checkSourceLevel(int pos, Feature feature) { 148 if (preview.isPreview(feature) && !preview.isEnabled()) { 149 //preview feature without --preview flag, error 150 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature)); 151 } else if (!feature.allowedInSource(source)) { 152 //incompatible source level, error 153 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name)); 154 } else if (preview.isPreview(feature)) { 155 //use of preview feature, warn 156 preview.warnPreview(pos, feature); 157 } 158 } 159 160 /** Report an error at the given position using the provided arguments. 161 */ 162 protected void lexError(int pos, JCDiagnostic.Error key) { 163 log.error(pos, key); 164 tk = TokenKind.ERROR; 165 errPos = pos; 166 } 167 168 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) { 169 log.error(flags, pos, key); 170 tk = TokenKind.ERROR; 171 errPos = pos; 172 } 173 174 protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) { 175 DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ; 176 log.warning(lc, dp, key); 177 } 178 179 /** Read next character in character or string literal and copy into sbuf. 180 * pos - start of literal offset 181 * translateEscapesNow - true if String::translateEscapes is not available 182 * in the java.base libs. Occurs during bootstrapping. 183 * multiline - true if scanning a text block. Allows newlines to be embedded 184 * in the result. 185 */ 186 private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) { 187 if (reader.ch == '\\') { 188 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 189 reader.skipChar(); 190 if (!translateEscapesNow) { 191 reader.putChar(false); 192 } 193 reader.putChar(true); 194 } else { 195 reader.nextChar(translateEscapesNow); 196 switch (reader.ch) { 197 case '0': case '1': case '2': case '3': 198 case '4': case '5': case '6': case '7': 199 char leadch = reader.ch; 200 int oct = reader.digit(pos, 8); 201 reader.nextChar(translateEscapesNow); 202 if ('0' <= reader.ch && reader.ch <= '7') { 203 oct = oct * 8 + reader.digit(pos, 8); 204 reader.nextChar(translateEscapesNow); 205 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 206 oct = oct * 8 + reader.digit(pos, 8); 207 reader.nextChar(translateEscapesNow); 208 } 209 } 210 if (translateEscapesNow) { 211 reader.putChar((char)oct); 212 } 213 break; 214 case 'b': 215 reader.putChar(translateEscapesNow ? '\b' : 'b', true); break; 216 case 't': 217 reader.putChar(translateEscapesNow ? '\t' : 't', true); break; 218 case 'n': 219 reader.putChar(translateEscapesNow ? '\n' : 'n', true); break; 220 case 'f': 221 reader.putChar(translateEscapesNow ? '\f' : 'f', true); break; 222 case 'r': 223 reader.putChar(translateEscapesNow ? '\r' : 'r', true); break; 224 case '\'': 225 case '\"': 226 case '\\': 227 reader.putChar(true); break; 228 case 's': 229 checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS); 230 reader.putChar(translateEscapesNow ? ' ' : 's', true); break; 231 case '\n': 232 case '\r': 233 if (!multiline) { 234 lexError(reader.bp, Errors.IllegalEscChar); 235 } else { 236 int start = reader.bp; 237 checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS); 238 if (reader.ch == '\r' && reader.peekChar() == '\n') { 239 reader.nextChar(translateEscapesNow); 240 } 241 reader.nextChar(translateEscapesNow); 242 processLineTerminator(start, reader.bp); 243 } 244 break; 245 default: 246 lexError(reader.bp, Errors.IllegalEscChar); 247 } 248 } 249 } else if (reader.bp != reader.buflen) { 250 reader.putChar(true); 251 } 252 } 253 254 /** Interim access to String methods used to support text blocks. 255 * Required to handle bootstrapping with pre-text block jdks. 256 * Should be replaced with direct calls in the 'next' jdk. 257 */ 258 static class TextBlockSupport { 259 /** Reflection method to remove incidental indentation. 260 */ 261 private static final Method stripIndent; 262 263 /** Reflection method to translate escape sequences. 264 */ 265 private static final Method translateEscapes; 266 267 /** true if stripIndent and translateEscapes are available in the bootstrap jdk. 268 */ 269 private static final boolean hasSupport; 270 271 /** Get a string method via refection or null if not available. 272 */ 273 private static Method getStringMethodOrNull(String name) { 274 try { 275 return String.class.getMethod(name); 276 } catch (Exception ex) { 277 // Method not available, return null. 278 } 279 return null; 280 } 281 282 static { 283 // Get text block string methods. 284 stripIndent = getStringMethodOrNull("stripIndent"); 285 translateEscapes = getStringMethodOrNull("translateEscapes"); 286 // true if stripIndent and translateEscapes are available in the bootstrap jdk. 287 hasSupport = stripIndent != null && translateEscapes != null; 288 } 289 290 /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk. 291 */ 292 static boolean hasSupport() { 293 return hasSupport; 294 } 295 296 /** Return the leading whitespace count (indentation) of the line. 297 */ 298 private static int indent(String line) { 299 return line.length() - line.stripLeading().length(); 300 } 301 302 enum WhitespaceChecks { 303 INCONSISTENT, 304 TRAILING 305 }; 306 307 /** Check that the use of white space in content is not problematic. 308 */ 309 static Set<WhitespaceChecks> checkWhitespace(String string) { 310 // Start with empty result set. 311 Set<WhitespaceChecks> checks = new HashSet<>(); 312 // No need to check empty strings. 313 if (string.isEmpty()) { 314 return checks; 315 } 316 // Maximum common indentation. 317 int outdent = 0; 318 // No need to check indentation if opting out (last line is empty.) 319 char lastChar = string.charAt(string.length() - 1); 320 boolean optOut = lastChar == '\n' || lastChar == '\r'; 321 // Split string based at line terminators. 322 String[] lines = string.split("\\R"); 323 int length = lines.length; 324 // Extract last line. 325 String lastLine = length == 0 ? "" : lines[length - 1]; 326 if (!optOut) { 327 // Prime with the last line indentation (may be blank.) 328 outdent = indent(lastLine); 329 for (String line : lines) { 330 // Blanks lines have no influence (last line accounted for.) 331 if (!line.isBlank()) { 332 outdent = Integer.min(outdent, indent(line)); 333 if (outdent == 0) { 334 break; 335 } 336 } 337 } 338 } 339 // Last line is representative. 340 String start = lastLine.substring(0, outdent); 341 for (String line : lines) { 342 // Fail if a line does not have the same indentation. 343 if (!line.isBlank() && !line.startsWith(start)) { 344 // Mix of different white space 345 checks.add(WhitespaceChecks.INCONSISTENT); 346 } 347 // Line has content even after indent is removed. 348 if (outdent < line.length()) { 349 // Is the last character a white space. 350 lastChar = line.charAt(line.length() - 1); 351 if (Character.isWhitespace(lastChar)) { 352 // Has trailing white space. 353 checks.add(WhitespaceChecks.TRAILING); 354 } 355 } 356 } 357 return checks; 358 } 359 360 /** Invoke String::stripIndent through reflection. 361 */ 362 static String stripIndent(String string) { 363 try { 364 string = (String)stripIndent.invoke(string); 365 } catch (InvocationTargetException | IllegalAccessException ex) { 366 throw new RuntimeException(ex); 367 } 368 return string; 369 } 370 371 /** Invoke String::translateEscapes through reflection. 372 */ 373 static String translateEscapes(String string) { 374 try { 375 string = (String)translateEscapes.invoke(string); 376 } catch (InvocationTargetException | IllegalAccessException ex) { 377 throw new RuntimeException(ex); 378 } 379 return string; 380 } 381 } 382 383 /** Test for EOLN. 384 */ 385 private boolean isEOLN() { 386 return reader.ch == LF || reader.ch == CR; 387 } 388 389 /** Test for CRLF. 390 */ 391 private boolean isCRLF() { 392 return reader.ch == CR && reader.peekChar() == LF; 393 } 394 395 /** Count and skip repeated occurrences of the specified character. 396 */ 397 private int countChar(char ch, int max) { 398 int count = 0; 399 for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) { 400 reader.scanChar(); 401 } 402 return count; 403 } 404 405 /** Scan a string literal or text block. 406 */ 407 private void scanString(int pos) { 408 // Clear flags. 409 isTextBlock = false; 410 hasEscapeSequences = false; 411 // Track the end of first line for error recovery. 412 int firstEOLN = -1; 413 // Attempt to scan for up to 3 double quotes. 414 int openCount = countChar('\"', 3); 415 switch (openCount) { 416 case 1: // Starting a string literal. 417 break; 418 case 2: // Starting an empty string literal. 419 // Start again but only consume one quote. 420 reader.reset(pos); 421 openCount = countChar('\"', 1); 422 break; 423 case 3: // Starting a text block. 424 // Check if preview feature is enabled for text blocks. 425 checkSourceLevel(pos, Feature.TEXT_BLOCKS); 426 isTextBlock = true; 427 // Verify the open delimiter sequence. 428 boolean hasOpenEOLN = false; 429 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) { 430 hasOpenEOLN = isEOLN(); 431 if (hasOpenEOLN) { 432 break; 433 } 434 reader.scanChar(); 435 } 436 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>. 437 if (!hasOpenEOLN) { 438 lexError(reader.bp, Errors.IllegalTextBlockOpen); 439 return; 440 } 441 // Skip line terminator. 442 int start = reader.bp; 443 if (isCRLF()) { 444 reader.scanChar(); 445 } 446 reader.scanChar(); 447 processLineTerminator(start, reader.bp); 448 break; 449 } 450 // While characters are available. 451 while (reader.bp < reader.buflen) { 452 // If possible close delimiter sequence. 453 if (reader.ch == '\"') { 454 // Check to see if enough double quotes are present. 455 int closeCount = countChar('\"', openCount); 456 if (openCount == closeCount) { 457 // Good result. 458 tk = Tokens.TokenKind.STRINGLITERAL; 459 return; 460 } 461 // False alarm, add double quotes to string buffer. 462 reader.repeat('\"', closeCount); 463 } else if (isEOLN()) { 464 // Line terminator in string literal is an error. 465 // Fall out to unclosed string literal error. 466 if (openCount == 1) { 467 break; 468 } 469 // Add line terminator to string buffer. 470 int start = reader.bp; 471 if (isCRLF()) { 472 reader.scanChar(); 473 } 474 reader.putChar('\n', true); 475 processLineTerminator(start, reader.bp); 476 // Record first line terminator for error recovery. 477 if (firstEOLN == -1) { 478 firstEOLN = reader.bp; 479 } 480 } else if (reader.ch == '\\') { 481 // Handle escape sequences. 482 hasEscapeSequences = true; 483 // Translate escapes immediately if TextBlockSupport is not available 484 // during bootstrapping. 485 boolean translateEscapesNow = !TextBlockSupport.hasSupport(); 486 scanLitChar(pos, translateEscapesNow, openCount != 1); 487 } else { 488 // Add character to string buffer. 489 reader.putChar(true); 490 } 491 } 492 // String ended without close delimiter sequence. 493 lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock); 494 if (firstEOLN != -1) { 495 // Reset recovery position to point after open delimiter sequence. 496 reader.reset(firstEOLN); 497 } 498 } 499 500 private void scanDigits(int pos, int digitRadix) { 501 char saveCh; 502 int savePos; 503 do { 504 if (reader.ch != '_') { 505 reader.putChar(false); 506 } 507 saveCh = reader.ch; 508 savePos = reader.bp; 509 reader.scanChar(); 510 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 511 if (saveCh == '_') 512 lexError(savePos, Errors.IllegalUnderscore); 513 } 514 515 /** Read fractional part of hexadecimal floating point number. 516 */ 517 private void scanHexExponentAndSuffix(int pos) { 518 if (reader.ch == 'p' || reader.ch == 'P') { 519 reader.putChar(true); 520 skipIllegalUnderscores(); 521 if (reader.ch == '+' || reader.ch == '-') { 522 reader.putChar(true); 523 } 524 skipIllegalUnderscores(); 525 if (reader.digit(pos, 10) >= 0) { 526 scanDigits(pos, 10); 527 if (!hexFloatsWork) 528 lexError(pos, Errors.UnsupportedCrossFpLit); 529 } else 530 lexError(pos, Errors.MalformedFpLit); 531 } else { 532 lexError(pos, Errors.MalformedFpLit); 533 } 534 if (reader.ch == 'f' || reader.ch == 'F') { 535 reader.putChar(true); 536 tk = TokenKind.FLOATLITERAL; 537 radix = 16; 538 } else { 539 if (reader.ch == 'd' || reader.ch == 'D') { 540 reader.putChar(true); 541 } 542 tk = TokenKind.DOUBLELITERAL; 543 radix = 16; 544 } 545 } 546 547 /** Read fractional part of floating point number. 548 */ 549 private void scanFraction(int pos) { 550 skipIllegalUnderscores(); 551 if (reader.digit(pos, 10) >= 0) { 552 scanDigits(pos, 10); 553 } 554 int sp1 = reader.sp; 555 if (reader.ch == 'e' || reader.ch == 'E') { 556 reader.putChar(true); 557 skipIllegalUnderscores(); 558 if (reader.ch == '+' || reader.ch == '-') { 559 reader.putChar(true); 560 } 561 skipIllegalUnderscores(); 562 if (reader.digit(pos, 10) >= 0) { 563 scanDigits(pos, 10); 564 return; 565 } 566 lexError(pos, Errors.MalformedFpLit); 567 reader.sp = sp1; 568 } 569 } 570 571 /** Read fractional part and 'd' or 'f' suffix of floating point number. 572 */ 573 private void scanFractionAndSuffix(int pos) { 574 radix = 10; 575 scanFraction(pos); 576 if (reader.ch == 'f' || reader.ch == 'F') { 577 reader.putChar(true); 578 tk = TokenKind.FLOATLITERAL; 579 } else { 580 if (reader.ch == 'd' || reader.ch == 'D') { 581 reader.putChar(true); 582 } 583 tk = TokenKind.DOUBLELITERAL; 584 } 585 } 586 587 /** Read fractional part and 'd' or 'f' suffix of floating point number. 588 */ 589 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 590 radix = 16; 591 Assert.check(reader.ch == '.'); 592 reader.putChar(true); 593 skipIllegalUnderscores(); 594 if (reader.digit(pos, 16) >= 0) { 595 seendigit = true; 596 scanDigits(pos, 16); 597 } 598 if (!seendigit) 599 lexError(pos, Errors.InvalidHexNumber); 600 else 601 scanHexExponentAndSuffix(pos); 602 } 603 604 private void skipIllegalUnderscores() { 605 if (reader.ch == '_') { 606 lexError(reader.bp, Errors.IllegalUnderscore); 607 while (reader.ch == '_') 608 reader.scanChar(); 609 } 610 } 611 612 /** Read a number. 613 * @param radix The radix of the number; one of 2, 8, 10, 16. 614 */ 615 private void scanNumber(int pos, int radix) { 616 // for octal, allow base-10 digit in case it's a float literal 617 this.radix = radix; 618 int digitRadix = (radix == 8 ? 10 : radix); 619 int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); 620 boolean seendigit = firstDigit >= 0; 621 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; 622 if (seendigit) { 623 scanDigits(pos, digitRadix); 624 } 625 if (radix == 16 && reader.ch == '.') { 626 scanHexFractionAndSuffix(pos, seendigit); 627 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 628 scanHexExponentAndSuffix(pos); 629 } else if (digitRadix == 10 && reader.ch == '.') { 630 reader.putChar(true); 631 scanFractionAndSuffix(pos); 632 } else if (digitRadix == 10 && 633 (reader.ch == 'e' || reader.ch == 'E' || 634 reader.ch == 'f' || reader.ch == 'F' || 635 reader.ch == 'd' || reader.ch == 'D')) { 636 scanFractionAndSuffix(pos); 637 } else { 638 if (!seenValidDigit) { 639 switch (radix) { 640 case 2: 641 lexError(pos, Errors.InvalidBinaryNumber); 642 break; 643 case 16: 644 lexError(pos, Errors.InvalidHexNumber); 645 break; 646 } 647 } 648 if (reader.ch == 'l' || reader.ch == 'L') { 649 reader.scanChar(); 650 tk = TokenKind.LONGLITERAL; 651 } else { 652 tk = TokenKind.INTLITERAL; 653 } 654 } 655 } 656 657 /** Read an identifier. 658 */ 659 private void scanIdent() { 660 boolean isJavaIdentifierPart; 661 char high; 662 reader.putChar(true); 663 do { 664 switch (reader.ch) { 665 case 'A': case 'B': case 'C': case 'D': case 'E': 666 case 'F': case 'G': case 'H': case 'I': case 'J': 667 case 'K': case 'L': case 'M': case 'N': case 'O': 668 case 'P': case 'Q': case 'R': case 'S': case 'T': 669 case 'U': case 'V': case 'W': case 'X': case 'Y': 670 case 'Z': 671 case 'a': case 'b': case 'c': case 'd': case 'e': 672 case 'f': case 'g': case 'h': case 'i': case 'j': 673 case 'k': case 'l': case 'm': case 'n': case 'o': 674 case 'p': case 'q': case 'r': case 's': case 't': 675 case 'u': case 'v': case 'w': case 'x': case 'y': 676 case 'z': 677 case '$': case '_': 678 case '0': case '1': case '2': case '3': case '4': 679 case '5': case '6': case '7': case '8': case '9': 680 break; 681 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 682 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 683 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 684 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 685 case '\u0015': case '\u0016': case '\u0017': 686 case '\u0018': case '\u0019': case '\u001B': 687 case '\u007F': 688 reader.scanChar(); 689 continue; 690 case '\u001A': // EOI is also a legal identifier part 691 if (reader.bp >= reader.buflen) { 692 name = reader.name(); 693 tk = tokens.lookupKind(name); 694 return; 695 } 696 reader.scanChar(); 697 continue; 698 default: 699 if (reader.ch < '\u0080') { 700 // all ASCII range chars already handled, above 701 isJavaIdentifierPart = false; 702 } else { 703 if (Character.isIdentifierIgnorable(reader.ch)) { 704 reader.scanChar(); 705 continue; 706 } else { 707 int codePoint = reader.peekSurrogates(); 708 if (codePoint >= 0) { 709 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 710 reader.putChar(true); 711 } 712 } else { 713 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 714 } 715 } 716 } 717 if (!isJavaIdentifierPart) { 718 name = reader.name(); 719 tk = tokens.lookupKind(name); 720 return; 721 } 722 } 723 reader.putChar(true); 724 } while (true); 725 } 726 727 /** Return true if reader.ch can be part of an operator. 728 */ 729 private boolean isSpecial(char ch) { 730 switch (ch) { 731 case '!': case '%': case '&': case '*': case '?': 732 case '+': case '-': case ':': case '<': case '=': 733 case '>': case '^': case '|': case '~': 734 case '@': 735 return true; 736 default: 737 return false; 738 } 739 } 740 741 /** Read longest possible sequence of special characters and convert 742 * to token. 743 */ 744 private void scanOperator() { 745 while (true) { 746 reader.putChar(false); 747 Name newname = reader.name(); 748 TokenKind tk1 = tokens.lookupKind(newname); 749 if (tk1 == TokenKind.IDENTIFIER) { 750 reader.sp--; 751 break; 752 } 753 tk = tk1; 754 reader.scanChar(); 755 if (!isSpecial(reader.ch)) break; 756 } 757 } 758 759 /** Read token. 760 */ 761 public Token readToken() { 762 763 reader.sp = 0; 764 name = null; 765 radix = 0; 766 767 int pos = 0; 768 int endPos = 0; 769 List<Comment> comments = null; 770 771 try { 772 loop: while (true) { 773 pos = reader.bp; 774 switch (reader.ch) { 775 case ' ': // (Spec 3.6) 776 case '\t': // (Spec 3.6) 777 case FF: // (Spec 3.6) 778 do { 779 reader.scanChar(); 780 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 781 processWhiteSpace(pos, reader.bp); 782 break; 783 case LF: // (Spec 3.4) 784 reader.scanChar(); 785 processLineTerminator(pos, reader.bp); 786 break; 787 case CR: // (Spec 3.4) 788 reader.scanChar(); 789 if (reader.ch == LF) { 790 reader.scanChar(); 791 } 792 processLineTerminator(pos, reader.bp); 793 break; 794 case 'A': case 'B': case 'C': case 'D': case 'E': 795 case 'F': case 'G': case 'H': case 'I': case 'J': 796 case 'K': case 'L': case 'M': case 'N': case 'O': 797 case 'P': case 'Q': case 'R': case 'S': case 'T': 798 case 'U': case 'V': case 'W': case 'X': case 'Y': 799 case 'Z': 800 case 'a': case 'b': case 'c': case 'd': case 'e': 801 case 'f': case 'g': case 'h': case 'i': case 'j': 802 case 'k': case 'l': case 'm': case 'n': case 'o': 803 case 'p': case 'q': case 'r': case 's': case 't': 804 case 'u': case 'v': case 'w': case 'x': case 'y': 805 case 'z': 806 case '$': case '_': 807 scanIdent(); 808 break loop; 809 case '0': 810 reader.scanChar(); 811 if (reader.ch == 'x' || reader.ch == 'X') { 812 reader.scanChar(); 813 skipIllegalUnderscores(); 814 scanNumber(pos, 16); 815 } else if (reader.ch == 'b' || reader.ch == 'B') { 816 reader.scanChar(); 817 skipIllegalUnderscores(); 818 scanNumber(pos, 2); 819 } else { 820 reader.putChar('0'); 821 if (reader.ch == '_') { 822 int savePos = reader.bp; 823 do { 824 reader.scanChar(); 825 } while (reader.ch == '_'); 826 if (reader.digit(pos, 10) < 0) { 827 lexError(savePos, Errors.IllegalUnderscore); 828 } 829 } 830 scanNumber(pos, 8); 831 } 832 break loop; 833 case '1': case '2': case '3': case '4': 834 case '5': case '6': case '7': case '8': case '9': 835 scanNumber(pos, 10); 836 break loop; 837 case '.': 838 reader.scanChar(); 839 if (reader.digit(pos, 10) >= 0) { 840 reader.putChar('.'); 841 scanFractionAndSuffix(pos); 842 } else if (reader.ch == '.') { 843 int savePos = reader.bp; 844 reader.putChar('.'); reader.putChar('.', true); 845 if (reader.ch == '.') { 846 reader.scanChar(); 847 reader.putChar('.'); 848 tk = TokenKind.ELLIPSIS; 849 } else { 850 lexError(savePos, Errors.IllegalDot); 851 } 852 } else { 853 tk = TokenKind.DOT; 854 } 855 break loop; 856 case ',': 857 reader.scanChar(); tk = TokenKind.COMMA; break loop; 858 case ';': 859 reader.scanChar(); tk = TokenKind.SEMI; break loop; 860 case '(': 861 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 862 case ')': 863 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 864 case '[': 865 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 866 case ']': 867 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 868 case '{': 869 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 870 case '}': 871 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 872 case '/': 873 reader.scanChar(); 874 if (reader.ch == '/') { 875 do { 876 reader.scanCommentChar(); 877 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 878 if (reader.bp < reader.buflen) { 879 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 880 } 881 break; 882 } else if (reader.ch == '*') { 883 boolean isEmpty = false; 884 reader.scanChar(); 885 CommentStyle style; 886 if (reader.ch == '*') { 887 style = CommentStyle.JAVADOC; 888 reader.scanCommentChar(); 889 if (reader.ch == '/') { 890 isEmpty = true; 891 } 892 } else { 893 style = CommentStyle.BLOCK; 894 } 895 while (!isEmpty && reader.bp < reader.buflen) { 896 if (reader.ch == '*') { 897 reader.scanChar(); 898 if (reader.ch == '/') break; 899 } else { 900 reader.scanCommentChar(); 901 } 902 } 903 if (reader.ch == '/') { 904 reader.scanChar(); 905 comments = addComment(comments, processComment(pos, reader.bp, style)); 906 break; 907 } else { 908 lexError(pos, Errors.UnclosedComment); 909 break loop; 910 } 911 } else if (reader.ch == '=') { 912 tk = TokenKind.SLASHEQ; 913 reader.scanChar(); 914 } else { 915 tk = TokenKind.SLASH; 916 } 917 break loop; 918 case '\'': 919 reader.scanChar(); 920 if (reader.ch == '\'') { 921 lexError(pos, Errors.EmptyCharLit); 922 reader.scanChar(); 923 } else { 924 if (isEOLN()) 925 lexError(pos, Errors.IllegalLineEndInCharLit); 926 scanLitChar(pos, true, false); 927 if (reader.ch == '\'') { 928 reader.scanChar(); 929 tk = TokenKind.CHARLITERAL; 930 } else { 931 lexError(pos, Errors.UnclosedCharLit); 932 } 933 } 934 break loop; 935 case '\"': 936 scanString(pos); 937 break loop; 938 default: 939 if (isSpecial(reader.ch)) { 940 scanOperator(); 941 } else { 942 boolean isJavaIdentifierStart; 943 int codePoint = -1; 944 if (reader.ch < '\u0080') { 945 // all ASCII range chars already handled, above 946 isJavaIdentifierStart = false; 947 } else { 948 codePoint = reader.peekSurrogates(); 949 if (codePoint >= 0) { 950 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 951 reader.putChar(true); 952 } 953 } else { 954 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 955 } 956 } 957 if (isJavaIdentifierStart) { 958 scanIdent(); 959 } else if (reader.digit(pos, 10) >= 0) { 960 scanNumber(pos, 10); 961 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 962 tk = TokenKind.EOF; 963 pos = reader.realLength; 964 } else { 965 String arg; 966 967 if (codePoint >= 0) { 968 char high = reader.ch; 969 reader.scanChar(); 970 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 971 } else { 972 arg = (32 < reader.ch && reader.ch < 127) ? 973 String.format("%s", reader.ch) : 974 String.format("\\u%04x", (int)reader.ch); 975 } 976 lexError(pos, Errors.IllegalChar(arg)); 977 reader.scanChar(); 978 } 979 } 980 break loop; 981 } 982 } 983 endPos = reader.bp; 984 switch (tk.tag) { 985 case DEFAULT: return new Token(tk, pos, endPos, comments); 986 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 987 case STRING: { 988 // Get characters from string buffer. 989 String string = reader.chars(); 990 // If a text block. 991 if (isTextBlock && TextBlockSupport.hasSupport()) { 992 // Verify that the incidental indentation is consistent. 993 if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) { 994 Set<TextBlockSupport.WhitespaceChecks> checks = 995 TextBlockSupport.checkWhitespace(string); 996 if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) { 997 lexWarning(LintCategory.TEXT_BLOCKS, pos, 998 Warnings.InconsistentWhiteSpaceIndentation); 999 } 1000 if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) { 1001 lexWarning(LintCategory.TEXT_BLOCKS, pos, 1002 Warnings.TrailingWhiteSpaceWillBeRemoved); 1003 } 1004 } 1005 // Remove incidental indentation. 1006 try { 1007 string = TextBlockSupport.stripIndent(string); 1008 } catch (Exception ex) { 1009 // Error already reported, just use unstripped string. 1010 } 1011 } 1012 // Translate escape sequences if present. 1013 if (hasEscapeSequences && TextBlockSupport.hasSupport()) { 1014 try { 1015 string = TextBlockSupport.translateEscapes(string); 1016 } catch (Exception ex) { 1017 // Error already reported, just use untranslated string. 1018 } 1019 } 1020 // Build string token. 1021 return new StringToken(tk, pos, endPos, string, comments); 1022 } 1023 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 1024 default: throw new AssertionError(); 1025 } 1026 } 1027 finally { 1028 if (scannerDebug) { 1029 System.out.println("nextToken(" + pos 1030 + "," + endPos + ")=|" + 1031 new String(reader.getRawCharacters(pos, endPos)) 1032 + "|"); 1033 } 1034 } 1035 } 1036 //where 1037 List<Comment> addComment(List<Comment> comments, Comment comment) { 1038 return comments == null ? 1039 List.of(comment) : 1040 comments.prepend(comment); 1041 } 1042 1043 /** Return the position where a lexical error occurred; 1044 */ 1045 public int errPos() { 1046 return errPos; 1047 } 1048 1049 /** Set the position where a lexical error occurred; 1050 */ 1051 public void errPos(int pos) { 1052 errPos = pos; 1053 } 1054 1055 /** 1056 * Called when a complete comment has been scanned. pos and endPos 1057 * will mark the comment boundary. 1058 */ 1059 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 1060 if (scannerDebug) 1061 System.out.println("processComment(" + pos 1062 + "," + endPos + "," + style + ")=|" 1063 + new String(reader.getRawCharacters(pos, endPos)) 1064 + "|"); 1065 char[] buf = reader.getRawCharacters(pos, endPos); 1066 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 1067 } 1068 1069 /** 1070 * Called when a complete whitespace run has been scanned. pos and endPos 1071 * will mark the whitespace boundary. 1072 */ 1073 protected void processWhiteSpace(int pos, int endPos) { 1074 if (scannerDebug) 1075 System.out.println("processWhitespace(" + pos 1076 + "," + endPos + ")=|" + 1077 new String(reader.getRawCharacters(pos, endPos)) 1078 + "|"); 1079 } 1080 1081 /** 1082 * Called when a line terminator has been processed. 1083 */ 1084 protected void processLineTerminator(int pos, int endPos) { 1085 if (scannerDebug) 1086 System.out.println("processTerminator(" + pos 1087 + "," + endPos + ")=|" + 1088 new String(reader.getRawCharacters(pos, endPos)) 1089 + "|"); 1090 } 1091 1092 /** Build a map for translating between line numbers and 1093 * positions in the input. 1094 * 1095 * @return a LineMap */ 1096 public Position.LineMap getLineMap() { 1097 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 1098 } 1099 1100 1101 /** 1102 * Scan a documentation comment; determine if a deprecated tag is present. 1103 * Called once the initial /, * have been skipped, positioned at the second * 1104 * (which is treated as the beginning of the first line). 1105 * Stops positioned at the closing '/'. 1106 */ 1107 protected static class BasicComment<U extends UnicodeReader> implements Comment { 1108 1109 CommentStyle cs; 1110 U comment_reader; 1111 1112 protected boolean deprecatedFlag = false; 1113 protected boolean scanned = false; 1114 1115 protected BasicComment(U comment_reader, CommentStyle cs) { 1116 this.comment_reader = comment_reader; 1117 this.cs = cs; 1118 } 1119 1120 public String getText() { 1121 return null; 1122 } 1123 1124 public int getSourcePos(int pos) { 1125 return -1; 1126 } 1127 1128 public CommentStyle getStyle() { 1129 return cs; 1130 } 1131 1132 public boolean isDeprecated() { 1133 if (!scanned && cs == CommentStyle.JAVADOC) { 1134 scanDocComment(); 1135 } 1136 return deprecatedFlag; 1137 } 1138 1139 @SuppressWarnings("fallthrough") 1140 protected void scanDocComment() { 1141 try { 1142 boolean deprecatedPrefix = false; 1143 1144 comment_reader.bp += 3; // '/**' 1145 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 1146 1147 forEachLine: 1148 while (comment_reader.bp < comment_reader.buflen) { 1149 1150 // Skip optional WhiteSpace at beginning of line 1151 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1152 comment_reader.scanCommentChar(); 1153 } 1154 1155 // Skip optional consecutive Stars 1156 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 1157 comment_reader.scanCommentChar(); 1158 if (comment_reader.ch == '/') { 1159 return; 1160 } 1161 } 1162 1163 // Skip optional WhiteSpace after Stars 1164 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1165 comment_reader.scanCommentChar(); 1166 } 1167 1168 deprecatedPrefix = false; 1169 // At beginning of line in the JavaDoc sense. 1170 if (!deprecatedFlag) { 1171 String deprecated = "@deprecated"; 1172 int i = 0; 1173 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 1174 comment_reader.scanCommentChar(); 1175 i++; 1176 if (i == deprecated.length()) { 1177 deprecatedPrefix = true; 1178 break; 1179 } 1180 } 1181 } 1182 1183 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 1184 if (Character.isWhitespace(comment_reader.ch)) { 1185 deprecatedFlag = true; 1186 } else if (comment_reader.ch == '*') { 1187 comment_reader.scanCommentChar(); 1188 if (comment_reader.ch == '/') { 1189 deprecatedFlag = true; 1190 return; 1191 } 1192 } 1193 } 1194 1195 // Skip rest of line 1196 while (comment_reader.bp < comment_reader.buflen) { 1197 switch (comment_reader.ch) { 1198 case '*': 1199 comment_reader.scanCommentChar(); 1200 if (comment_reader.ch == '/') { 1201 return; 1202 } 1203 break; 1204 case CR: // (Spec 3.4) 1205 comment_reader.scanCommentChar(); 1206 if (comment_reader.ch != LF) { 1207 continue forEachLine; 1208 } 1209 /* fall through to LF case */ 1210 case LF: // (Spec 3.4) 1211 comment_reader.scanCommentChar(); 1212 continue forEachLine; 1213 default: 1214 comment_reader.scanCommentChar(); 1215 } 1216 } // rest of line 1217 } // forEachLine 1218 return; 1219 } finally { 1220 scanned = true; 1221 } 1222 } 1223 } 1224 }