1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.code.Lint; 29 import com.sun.tools.javac.code.Lint.LintCategory; 30 import com.sun.tools.javac.code.Preview; 31 import com.sun.tools.javac.code.Source; 32 import com.sun.tools.javac.code.Source.Feature; 33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 34 import com.sun.tools.javac.resources.CompilerProperties.Errors; 35 import com.sun.tools.javac.resources.CompilerProperties.Warnings; 36 import com.sun.tools.javac.util.*; 37 import com.sun.tools.javac.util.JCDiagnostic.*; 38 39 import java.lang.reflect.InvocationTargetException; 40 import java.lang.reflect.Method; 41 import java.nio.CharBuffer; 42 import java.util.HashSet; 43 import java.util.Set; 44 45 import static com.sun.tools.javac.parser.Tokens.*; 46 import static com.sun.tools.javac.util.LayoutCharacters.*; 47 48 /** The lexical analyzer maps an input stream consisting of 49 * ASCII characters and Unicode escapes into a token sequence. 50 * 51 * <p><b>This is NOT part of any supported API. 52 * If you write code that depends on this, you do so at your own risk. 53 * This code and its internal interfaces are subject to change or 54 * deletion without notice.</b> 55 */ 56 public class JavaTokenizer { 57 58 private static final boolean scannerDebug = false; 59 60 /** The source language setting. 61 */ 62 private Source source; 63 64 /** The preview language setting. */ 65 private Preview preview; 66 67 /** The log to be used for error reporting. 68 */ 69 private final Log log; 70 71 /** The token factory. */ 72 private final Tokens tokens; 73 74 /** The token kind, set by nextToken(). 75 */ 76 protected TokenKind tk; 77 78 /** The token's radix, set by nextToken(). 79 */ 80 protected int radix; 81 82 /** The token's name, set by nextToken(). 83 */ 84 protected Name name; 85 86 /** The position where a lexical error occurred; 87 */ 88 protected int errPos = Position.NOPOS; 89 90 /** The Unicode reader (low-level stream reader). 91 */ 92 protected UnicodeReader reader; 93 94 /** Should the string stripped of indentation? 95 */ 96 protected boolean shouldStripIndent; 97 98 /** Should the string's escapes be translated? 99 */ 100 protected boolean shouldTranslateEscapes; 101 102 /** Has the string broken escapes? 103 */ 104 protected boolean hasBrokenEscapes; 105 106 protected ScannerFactory fac; 107 108 // The set of lint options currently in effect. It is initialized 109 // from the context, and then is set/reset as needed by Attr as it 110 // visits all the various parts of the trees during attribution. 111 protected Lint lint; 112 113 private static final boolean hexFloatsWork = hexFloatsWork(); 114 private static boolean hexFloatsWork() { 115 try { 116 Float.valueOf("0x1.0p1"); 117 return true; 118 } catch (NumberFormatException ex) { 119 return false; 120 } 121 } 122 123 /** 124 * Create a scanner from the input array. This method might 125 * modify the array. To avoid copying the input array, ensure 126 * that {@code inputLength < input.length} or 127 * {@code input[input.length -1]} is a white space character. 128 * 129 * @param fac the factory which created this Scanner 130 * @param buf the input, might be modified 131 * Must be positive and less than or equal to input.length. 132 */ 133 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 134 this(fac, new UnicodeReader(fac, buf)); 135 } 136 137 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 138 this(fac, new UnicodeReader(fac, buf, inputLength)); 139 } 140 141 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 142 this.fac = fac; 143 this.log = fac.log; 144 this.tokens = fac.tokens; 145 this.source = fac.source; 146 this.preview = fac.preview; 147 this.reader = reader; 148 this.lint = fac.lint; 149 } 150 151 protected void checkSourceLevel(int pos, Feature feature) { 152 if (preview.isPreview(feature) && !preview.isEnabled()) { 153 //preview feature without --preview flag, error 154 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature)); 155 } else if (!feature.allowedInSource(source)) { 156 //incompatible source level, error 157 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name)); 158 } else if (preview.isPreview(feature)) { 159 //use of preview feature, warn 160 preview.warnPreview(pos, feature); 161 } 162 } 163 164 /** Report an error at the given position using the provided arguments. 165 */ 166 protected void lexError(int pos, JCDiagnostic.Error key) { 167 log.error(pos, key); 168 tk = TokenKind.ERROR; 169 errPos = pos; 170 } 171 172 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) { 173 log.error(flags, pos, key); 174 tk = TokenKind.ERROR; 175 errPos = pos; 176 } 177 178 protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) { 179 DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ; 180 log.warning(lc, dp, key); 181 } 182 183 /** Read next character in character or string literal and copy into sbuf. 184 */ 185 private void scanLitChar(int pos) { 186 if (reader.ch == '\\') { 187 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 188 reader.skipChar(); 189 reader.putChar('\\', true); 190 } else { 191 reader.scanChar(); 192 switch (reader.ch) { 193 case '0': case '1': case '2': case '3': 194 case '4': case '5': case '6': case '7': 195 char leadch = reader.ch; 196 int oct = reader.digit(pos, 8); 197 reader.scanChar(); 198 if ('0' <= reader.ch && reader.ch <= '7') { 199 oct = oct * 8 + reader.digit(pos, 8); 200 reader.scanChar(); 201 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 202 oct = oct * 8 + reader.digit(pos, 8); 203 reader.scanChar(); 204 } 205 } 206 reader.putChar((char)oct); 207 break; 208 case 'b': 209 reader.putChar('\b', true); break; 210 case 't': 211 reader.putChar('\t', true); break; 212 case 'n': 213 reader.putChar('\n', true); break; 214 case 'f': 215 reader.putChar('\f', true); break; 216 case 'r': 217 reader.putChar('\r', true); break; 218 case '\'': 219 reader.putChar('\'', true); break; 220 case '\"': 221 reader.putChar('\"', true); break; 222 case '\\': 223 reader.putChar('\\', true); break; 224 default: 225 lexError(reader.bp, Errors.IllegalEscChar); 226 } 227 } 228 } else if (reader.bp != reader.buflen) { 229 reader.putChar(true); 230 } 231 } 232 233 /** Read next character in character or string literal and copy into sbuf 234 * without translating escapes. Used by text blocks to preflight verify 235 * escapes sequences. 236 */ 237 private void scanLitCharRaw(int pos) { 238 if (reader.ch == '\\') { 239 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 240 reader.skipChar(); 241 reader.putChar('\\', false); 242 reader.putChar('\\', true); 243 } else { 244 reader.putChar('\\', true); 245 switch (reader.ch) { 246 case '0': case '1': case '2': case '3': 247 case '4': case '5': case '6': case '7': 248 char leadch = reader.ch; 249 reader.putChar(true); 250 if ('0' <= reader.ch && reader.ch <= '7') { 251 reader.putChar(true); 252 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 253 reader.putChar(true); 254 } 255 } 256 break; 257 // Effectively list of valid escape sequences. 258 case 'b': 259 case 't': 260 case 'n': 261 case 'f': 262 case 'r': 263 case '\'': 264 case '\"': 265 case '\\': 266 reader.putChar(true); break; 267 default: 268 hasBrokenEscapes = true; 269 lexError(reader.bp, Errors.IllegalEscChar); 270 } 271 } 272 } else if (reader.bp != reader.buflen) { 273 reader.putChar(true); 274 } 275 } 276 277 /** Interim access to String methods used to support text blocks. 278 * Required to handle bootstrapping with pre-text block jdks. 279 * Could be reworked in the 'next' jdk. 280 */ 281 static class TextBlockSupport { 282 /** Reflection method to remove incidental indentation. 283 */ 284 private static final Method stripIndent; 285 286 /** Reflection method to translate escape sequences. 287 */ 288 private static final Method translateEscapes; 289 290 /** true if stripIndent and translateEscapes are available in the bootstrap jdk. 291 */ 292 private static final boolean hasSupport; 293 294 /** Get a string method via refection or null if not available. 295 */ 296 private static Method getStringMethodOrNull(String name) { 297 try { 298 return String.class.getMethod(name); 299 } catch (Exception ex) { 300 // Method not available, return null. 301 } 302 return null; 303 } 304 305 static { 306 // Get text block string methods. 307 stripIndent = getStringMethodOrNull("stripIndent"); 308 translateEscapes = getStringMethodOrNull("translateEscapes"); 309 // true if stripIndent and translateEscapes are available in the bootstrap jdk. 310 hasSupport = stripIndent != null && translateEscapes != null; 311 } 312 313 /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk. 314 */ 315 static boolean hasSupport() { 316 return hasSupport; 317 } 318 319 /** Return the leading whitespace count (indentation) of the line. 320 */ 321 private static int indent(String line) { 322 return line.length() - line.stripLeading().length(); 323 } 324 325 enum WhitespaceChecks { 326 INCONSISTENT, 327 TRAILING 328 }; 329 330 /** Check that the use of white space in content is not problematic. 331 */ 332 static Set<WhitespaceChecks> checkWhitespace(String string) { 333 // Start with empty result set. 334 Set<WhitespaceChecks> checks = new HashSet<>(); 335 // No need to check empty strings. 336 if (string.isEmpty()) { 337 return checks; 338 } 339 // Maximum common indentation. 340 int outdent = 0; 341 // No need to check indentation if opting out (last line is empty.) 342 char lastChar = string.charAt(string.length() - 1); 343 boolean optOut = lastChar == '\n' || lastChar == '\r'; 344 // Split string based at line terminators. 345 String[] lines = string.split("\\R"); 346 int length = lines.length; 347 // Extract last line. 348 String lastLine = lines[length - 1]; 349 if (!optOut) { 350 // Prime with the last line indentation (may be blank.) 351 outdent = indent(lastLine); 352 for (String line : lines) { 353 // Blanks lines have no influence (last line accounted for.) 354 if (!line.isBlank()) { 355 outdent = Integer.min(outdent, indent(line)); 356 if (outdent == 0) { 357 break; 358 } 359 } 360 } 361 } 362 // Last line is representative. 363 String start = lastLine.substring(0, outdent); 364 for (String line : lines) { 365 // Fail if a line does not have the same indentation. 366 if (!line.isBlank() && !line.startsWith(start)) { 367 // Mix of different white space 368 checks.add(WhitespaceChecks.INCONSISTENT); 369 } 370 // Line has content even after indent is removed. 371 if (outdent < line.length()) { 372 // Is the last character a white space. 373 lastChar = line.charAt(line.length() - 1); 374 if (Character.isWhitespace(lastChar)) { 375 // Has trailing white space. 376 checks.add(WhitespaceChecks.TRAILING); 377 } 378 } 379 } 380 return checks; 381 } 382 383 /** Invoke String::stripIndent through reflection. 384 */ 385 static String stripIndent(String string) { 386 try { 387 string = (String)stripIndent.invoke(string); 388 } catch (InvocationTargetException | IllegalAccessException ex) { 389 throw new RuntimeException(ex); 390 } 391 return string; 392 } 393 394 /** Invoke String::translateEscapes through reflection. 395 */ 396 static String translateEscapes(String string) { 397 try { 398 string = (String)translateEscapes.invoke(string); 399 } catch (InvocationTargetException | IllegalAccessException ex) { 400 throw new RuntimeException(ex); 401 } 402 return string; 403 } 404 } 405 406 /** Test for EOLN. 407 */ 408 private boolean isEOLN() { 409 return reader.ch == LF || reader.ch == CR; 410 } 411 412 /** Test for CRLF. 413 */ 414 private boolean isCRLF() { 415 return reader.ch == CR && reader.peekChar() == LF; 416 } 417 418 /** Count and skip repeated occurances of the specified character. 419 */ 420 private int countChar(char ch, int max) { 421 int count = 0; 422 for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) { 423 reader.scanChar(); 424 } 425 return count; 426 } 427 428 /** Scan a string literal or text block. 429 */ 430 private void scanString(int pos) { 431 // Clear flags. 432 shouldStripIndent = false; 433 shouldTranslateEscapes = false; 434 hasBrokenEscapes = false; 435 // Check if text block string methods are present. 436 boolean hasTextBlockSupport = TextBlockSupport.hasSupport(); 437 // Track the end of first line for error recovery. 438 int firstEOLN = -1; 439 // Attempt to scan for up to 3 double quotes. 440 int openCount = countChar('\"', 3); 441 switch (openCount) { 442 case 1: // Starting a string literal. 443 break; 444 case 2: // Starting an empty string literal. 445 // Start again but only consume one quote. 446 reader.reset(pos); 447 openCount = countChar('\"', 1); 448 break; 449 case 3: // Starting a text block. 450 // Check if preview feature is enabled for text blocks. 451 checkSourceLevel(pos, Feature.TEXT_BLOCKS); 452 // Only proceed if text block string methods are present. 453 if (hasTextBlockSupport) { 454 // Indicate that the final string should have incidental indentation removed. 455 shouldStripIndent = true; 456 // Verify the open delimiter sequence. 457 boolean hasOpenEOLN = false; 458 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) { 459 hasOpenEOLN = isEOLN(); 460 if (hasOpenEOLN) { 461 break; 462 } 463 reader.scanChar(); 464 } 465 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>. 466 if (!hasOpenEOLN) { 467 lexError(reader.bp, Errors.IllegalTextBlockOpen); 468 return; 469 } 470 // Skip line terminator. 471 int start = reader.bp; 472 if (isCRLF()) { 473 reader.scanChar(); 474 } 475 reader.scanChar(); 476 processLineTerminator(start, reader.bp); 477 } else { 478 // No text block string methods are present, so reset and treat like string literal. 479 reader.reset(pos); 480 openCount = countChar('\"', 1); 481 } 482 break; 483 } 484 // While characters are available. 485 while (reader.bp < reader.buflen) { 486 // If possible close delimiter sequence. 487 if (reader.ch == '\"') { 488 // Check to see if enough double quotes are present. 489 int closeCount = countChar('\"', openCount); 490 if (openCount == closeCount) { 491 // Good result. 492 tk = Tokens.TokenKind.STRINGLITERAL; 493 return; 494 } 495 // False alarm, add double quotes to string buffer. 496 reader.repeat('\"', closeCount); 497 } else if (isEOLN()) { 498 // Line terminator in string literal is an error. 499 // Fall out to unclosed string literal error. 500 if (openCount == 1) { 501 break; 502 } 503 // Add line terminator to string buffer. 504 int start = reader.bp; 505 if (isCRLF()) { 506 reader.scanChar(); 507 } 508 reader.putChar('\n', true); 509 processLineTerminator(start, reader.bp); 510 // Record first line terminator for error recovery. 511 if (firstEOLN == -1) { 512 firstEOLN = reader.bp; 513 } 514 } else if (reader.ch == '\\') { 515 // Handle escape sequences. 516 if (hasTextBlockSupport) { 517 // Indicate that the final string should have escapes translated. 518 shouldTranslateEscapes = true; 519 // Validate escape sequence and add to string buffer. 520 scanLitCharRaw(pos); 521 } else { 522 // Translate escape sequence and add result to string buffer. 523 scanLitChar(pos); 524 } 525 } else { 526 // Add character to string buffer. 527 reader.putChar(true); 528 } 529 } 530 // String ended without close delimiter sequence. 531 lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock); 532 if (firstEOLN != -1) { 533 // Reset recovery position to point after open delimiter sequence. 534 reader.reset(firstEOLN); 535 } 536 } 537 538 private void scanDigits(int pos, int digitRadix) { 539 char saveCh; 540 int savePos; 541 do { 542 if (reader.ch != '_') { 543 reader.putChar(false); 544 } 545 saveCh = reader.ch; 546 savePos = reader.bp; 547 reader.scanChar(); 548 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 549 if (saveCh == '_') 550 lexError(savePos, Errors.IllegalUnderscore); 551 } 552 553 /** Read fractional part of hexadecimal floating point number. 554 */ 555 private void scanHexExponentAndSuffix(int pos) { 556 if (reader.ch == 'p' || reader.ch == 'P') { 557 reader.putChar(true); 558 skipIllegalUnderscores(); 559 if (reader.ch == '+' || reader.ch == '-') { 560 reader.putChar(true); 561 } 562 skipIllegalUnderscores(); 563 if (reader.digit(pos, 10) >= 0) { 564 scanDigits(pos, 10); 565 if (!hexFloatsWork) 566 lexError(pos, Errors.UnsupportedCrossFpLit); 567 } else 568 lexError(pos, Errors.MalformedFpLit); 569 } else { 570 lexError(pos, Errors.MalformedFpLit); 571 } 572 if (reader.ch == 'f' || reader.ch == 'F') { 573 reader.putChar(true); 574 tk = TokenKind.FLOATLITERAL; 575 radix = 16; 576 } else { 577 if (reader.ch == 'd' || reader.ch == 'D') { 578 reader.putChar(true); 579 } 580 tk = TokenKind.DOUBLELITERAL; 581 radix = 16; 582 } 583 } 584 585 /** Read fractional part of floating point number. 586 */ 587 private void scanFraction(int pos) { 588 skipIllegalUnderscores(); 589 if (reader.digit(pos, 10) >= 0) { 590 scanDigits(pos, 10); 591 } 592 int sp1 = reader.sp; 593 if (reader.ch == 'e' || reader.ch == 'E') { 594 reader.putChar(true); 595 skipIllegalUnderscores(); 596 if (reader.ch == '+' || reader.ch == '-') { 597 reader.putChar(true); 598 } 599 skipIllegalUnderscores(); 600 if (reader.digit(pos, 10) >= 0) { 601 scanDigits(pos, 10); 602 return; 603 } 604 lexError(pos, Errors.MalformedFpLit); 605 reader.sp = sp1; 606 } 607 } 608 609 /** Read fractional part and 'd' or 'f' suffix of floating point number. 610 */ 611 private void scanFractionAndSuffix(int pos) { 612 radix = 10; 613 scanFraction(pos); 614 if (reader.ch == 'f' || reader.ch == 'F') { 615 reader.putChar(true); 616 tk = TokenKind.FLOATLITERAL; 617 } else { 618 if (reader.ch == 'd' || reader.ch == 'D') { 619 reader.putChar(true); 620 } 621 tk = TokenKind.DOUBLELITERAL; 622 } 623 } 624 625 /** Read fractional part and 'd' or 'f' suffix of floating point number. 626 */ 627 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 628 radix = 16; 629 Assert.check(reader.ch == '.'); 630 reader.putChar(true); 631 skipIllegalUnderscores(); 632 if (reader.digit(pos, 16) >= 0) { 633 seendigit = true; 634 scanDigits(pos, 16); 635 } 636 if (!seendigit) 637 lexError(pos, Errors.InvalidHexNumber); 638 else 639 scanHexExponentAndSuffix(pos); 640 } 641 642 private void skipIllegalUnderscores() { 643 if (reader.ch == '_') { 644 lexError(reader.bp, Errors.IllegalUnderscore); 645 while (reader.ch == '_') 646 reader.scanChar(); 647 } 648 } 649 650 /** Read a number. 651 * @param radix The radix of the number; one of 2, 8, 10, 16. 652 */ 653 private void scanNumber(int pos, int radix) { 654 // for octal, allow base-10 digit in case it's a float literal 655 this.radix = radix; 656 int digitRadix = (radix == 8 ? 10 : radix); 657 int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); 658 boolean seendigit = firstDigit >= 0; 659 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; 660 if (seendigit) { 661 scanDigits(pos, digitRadix); 662 } 663 if (radix == 16 && reader.ch == '.') { 664 scanHexFractionAndSuffix(pos, seendigit); 665 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 666 scanHexExponentAndSuffix(pos); 667 } else if (digitRadix == 10 && reader.ch == '.') { 668 reader.putChar(true); 669 scanFractionAndSuffix(pos); 670 } else if (digitRadix == 10 && 671 (reader.ch == 'e' || reader.ch == 'E' || 672 reader.ch == 'f' || reader.ch == 'F' || 673 reader.ch == 'd' || reader.ch == 'D')) { 674 scanFractionAndSuffix(pos); 675 } else { 676 if (!seenValidDigit) { 677 switch (radix) { 678 case 2: 679 lexError(pos, Errors.InvalidBinaryNumber); 680 break; 681 case 16: 682 lexError(pos, Errors.InvalidHexNumber); 683 break; 684 } 685 } 686 if (reader.ch == 'l' || reader.ch == 'L') { 687 reader.scanChar(); 688 tk = TokenKind.LONGLITERAL; 689 } else { 690 tk = TokenKind.INTLITERAL; 691 } 692 } 693 } 694 695 /** Read an identifier. 696 */ 697 private void scanIdent() { 698 boolean isJavaIdentifierPart; 699 char high; 700 reader.putChar(true); 701 do { 702 switch (reader.ch) { 703 case 'A': case 'B': case 'C': case 'D': case 'E': 704 case 'F': case 'G': case 'H': case 'I': case 'J': 705 case 'K': case 'L': case 'M': case 'N': case 'O': 706 case 'P': case 'Q': case 'R': case 'S': case 'T': 707 case 'U': case 'V': case 'W': case 'X': case 'Y': 708 case 'Z': 709 case 'a': case 'b': case 'c': case 'd': case 'e': 710 case 'f': case 'g': case 'h': case 'i': case 'j': 711 case 'k': case 'l': case 'm': case 'n': case 'o': 712 case 'p': case 'q': case 'r': case 's': case 't': 713 case 'u': case 'v': case 'w': case 'x': case 'y': 714 case 'z': 715 case '$': case '_': 716 case '0': case '1': case '2': case '3': case '4': 717 case '5': case '6': case '7': case '8': case '9': 718 break; 719 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 720 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 721 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 722 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 723 case '\u0015': case '\u0016': case '\u0017': 724 case '\u0018': case '\u0019': case '\u001B': 725 case '\u007F': 726 reader.scanChar(); 727 continue; 728 case '\u001A': // EOI is also a legal identifier part 729 if (reader.bp >= reader.buflen) { 730 name = reader.name(); 731 tk = tokens.lookupKind(name); 732 return; 733 } 734 reader.scanChar(); 735 continue; 736 default: 737 if (reader.ch < '\u0080') { 738 // all ASCII range chars already handled, above 739 isJavaIdentifierPart = false; 740 } else { 741 if (Character.isIdentifierIgnorable(reader.ch)) { 742 reader.scanChar(); 743 continue; 744 } else { 745 int codePoint = reader.peekSurrogates(); 746 if (codePoint >= 0) { 747 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 748 reader.putChar(true); 749 } 750 } else { 751 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 752 } 753 } 754 } 755 if (!isJavaIdentifierPart) { 756 name = reader.name(); 757 tk = tokens.lookupKind(name); 758 return; 759 } 760 } 761 reader.putChar(true); 762 } while (true); 763 } 764 765 /** Return true if reader.ch can be part of an operator. 766 */ 767 private boolean isSpecial(char ch) { 768 switch (ch) { 769 case '!': case '%': case '&': case '*': case '?': 770 case '+': case '-': case ':': case '<': case '=': 771 case '>': case '^': case '|': case '~': 772 case '@': 773 return true; 774 default: 775 return false; 776 } 777 } 778 779 /** Read longest possible sequence of special characters and convert 780 * to token. 781 */ 782 private void scanOperator() { 783 while (true) { 784 reader.putChar(false); 785 Name newname = reader.name(); 786 TokenKind tk1 = tokens.lookupKind(newname); 787 if (tk1 == TokenKind.IDENTIFIER) { 788 reader.sp--; 789 break; 790 } 791 tk = tk1; 792 reader.scanChar(); 793 if (!isSpecial(reader.ch)) break; 794 } 795 } 796 797 /** Read token. 798 */ 799 public Token readToken() { 800 801 reader.sp = 0; 802 name = null; 803 radix = 0; 804 805 int pos = 0; 806 int endPos = 0; 807 List<Comment> comments = null; 808 809 try { 810 loop: while (true) { 811 pos = reader.bp; 812 switch (reader.ch) { 813 case ' ': // (Spec 3.6) 814 case '\t': // (Spec 3.6) 815 case FF: // (Spec 3.6) 816 do { 817 reader.scanChar(); 818 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 819 processWhiteSpace(pos, reader.bp); 820 break; 821 case LF: // (Spec 3.4) 822 reader.scanChar(); 823 processLineTerminator(pos, reader.bp); 824 break; 825 case CR: // (Spec 3.4) 826 reader.scanChar(); 827 if (reader.ch == LF) { 828 reader.scanChar(); 829 } 830 processLineTerminator(pos, reader.bp); 831 break; 832 case 'A': case 'B': case 'C': case 'D': case 'E': 833 case 'F': case 'G': case 'H': case 'I': case 'J': 834 case 'K': case 'L': case 'M': case 'N': case 'O': 835 case 'P': case 'Q': case 'R': case 'S': case 'T': 836 case 'U': case 'V': case 'W': case 'X': case 'Y': 837 case 'Z': 838 case 'a': case 'b': case 'c': case 'd': case 'e': 839 case 'f': case 'g': case 'h': case 'i': case 'j': 840 case 'k': case 'l': case 'm': case 'n': case 'o': 841 case 'p': case 'q': case 'r': case 's': case 't': 842 case 'u': case 'v': case 'w': case 'x': case 'y': 843 case 'z': 844 case '$': case '_': 845 scanIdent(); 846 break loop; 847 case '0': 848 reader.scanChar(); 849 if (reader.ch == 'x' || reader.ch == 'X') { 850 reader.scanChar(); 851 skipIllegalUnderscores(); 852 scanNumber(pos, 16); 853 } else if (reader.ch == 'b' || reader.ch == 'B') { 854 reader.scanChar(); 855 skipIllegalUnderscores(); 856 scanNumber(pos, 2); 857 } else { 858 reader.putChar('0'); 859 if (reader.ch == '_') { 860 int savePos = reader.bp; 861 do { 862 reader.scanChar(); 863 } while (reader.ch == '_'); 864 if (reader.digit(pos, 10) < 0) { 865 lexError(savePos, Errors.IllegalUnderscore); 866 } 867 } 868 scanNumber(pos, 8); 869 } 870 break loop; 871 case '1': case '2': case '3': case '4': 872 case '5': case '6': case '7': case '8': case '9': 873 scanNumber(pos, 10); 874 break loop; 875 case '.': 876 reader.scanChar(); 877 if (reader.digit(pos, 10) >= 0) { 878 reader.putChar('.'); 879 scanFractionAndSuffix(pos); 880 } else if (reader.ch == '.') { 881 int savePos = reader.bp; 882 reader.putChar('.'); reader.putChar('.', true); 883 if (reader.ch == '.') { 884 reader.scanChar(); 885 reader.putChar('.'); 886 tk = TokenKind.ELLIPSIS; 887 } else { 888 lexError(savePos, Errors.IllegalDot); 889 } 890 } else { 891 tk = TokenKind.DOT; 892 } 893 break loop; 894 case ',': 895 reader.scanChar(); tk = TokenKind.COMMA; break loop; 896 case ';': 897 reader.scanChar(); tk = TokenKind.SEMI; break loop; 898 case '(': 899 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 900 case ')': 901 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 902 case '[': 903 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 904 case ']': 905 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 906 case '{': 907 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 908 case '}': 909 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 910 case '/': 911 reader.scanChar(); 912 if (reader.ch == '/') { 913 do { 914 reader.scanCommentChar(); 915 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 916 if (reader.bp < reader.buflen) { 917 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 918 } 919 break; 920 } else if (reader.ch == '*') { 921 boolean isEmpty = false; 922 reader.scanChar(); 923 CommentStyle style; 924 if (reader.ch == '*') { 925 style = CommentStyle.JAVADOC; 926 reader.scanCommentChar(); 927 if (reader.ch == '/') { 928 isEmpty = true; 929 } 930 } else { 931 style = CommentStyle.BLOCK; 932 } 933 while (!isEmpty && reader.bp < reader.buflen) { 934 if (reader.ch == '*') { 935 reader.scanChar(); 936 if (reader.ch == '/') break; 937 } else { 938 reader.scanCommentChar(); 939 } 940 } 941 if (reader.ch == '/') { 942 reader.scanChar(); 943 comments = addComment(comments, processComment(pos, reader.bp, style)); 944 break; 945 } else { 946 lexError(pos, Errors.UnclosedComment); 947 break loop; 948 } 949 } else if (reader.ch == '=') { 950 tk = TokenKind.SLASHEQ; 951 reader.scanChar(); 952 } else { 953 tk = TokenKind.SLASH; 954 } 955 break loop; 956 case '\'': 957 reader.scanChar(); 958 if (reader.ch == '\'') { 959 lexError(pos, Errors.EmptyCharLit); 960 reader.scanChar(); 961 } else { 962 if (isEOLN()) 963 lexError(pos, Errors.IllegalLineEndInCharLit); 964 scanLitChar(pos); 965 if (reader.ch == '\'') { 966 reader.scanChar(); 967 tk = TokenKind.CHARLITERAL; 968 } else { 969 lexError(pos, Errors.UnclosedCharLit); 970 } 971 } 972 break loop; 973 case '\"': 974 scanString(pos); 975 break loop; 976 default: 977 if (isSpecial(reader.ch)) { 978 scanOperator(); 979 } else { 980 boolean isJavaIdentifierStart; 981 int codePoint = -1; 982 if (reader.ch < '\u0080') { 983 // all ASCII range chars already handled, above 984 isJavaIdentifierStart = false; 985 } else { 986 codePoint = reader.peekSurrogates(); 987 if (codePoint >= 0) { 988 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 989 reader.putChar(true); 990 } 991 } else { 992 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 993 } 994 } 995 if (isJavaIdentifierStart) { 996 scanIdent(); 997 } else if (reader.digit(pos, 10) >= 0) { 998 scanNumber(pos, 10); 999 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 1000 tk = TokenKind.EOF; 1001 pos = reader.realLength; 1002 } else { 1003 String arg; 1004 1005 if (codePoint >= 0) { 1006 char high = reader.ch; 1007 reader.scanChar(); 1008 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 1009 } else { 1010 arg = (32 < reader.ch && reader.ch < 127) ? 1011 String.format("%s", reader.ch) : 1012 String.format("\\u%04x", (int)reader.ch); 1013 } 1014 lexError(pos, Errors.IllegalChar(arg)); 1015 reader.scanChar(); 1016 } 1017 } 1018 break loop; 1019 } 1020 } 1021 endPos = reader.bp; 1022 switch (tk.tag) { 1023 case DEFAULT: return new Token(tk, pos, endPos, comments); 1024 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 1025 case STRING: { 1026 // Get characters from string buffer. 1027 String string = reader.chars(); 1028 // If a text block. 1029 if (shouldStripIndent) { 1030 // Verify that the incidental indentation is consistent. 1031 if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) { 1032 Set<TextBlockSupport.WhitespaceChecks> checks = 1033 TextBlockSupport.checkWhitespace(string); 1034 if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) { 1035 lexWarning(LintCategory.TEXT_BLOCKS, pos, 1036 Warnings.InconsistentWhiteSpaceIndentation); 1037 } 1038 if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) { 1039 lexWarning(LintCategory.TEXT_BLOCKS, pos, 1040 Warnings.TrailingWhiteSpaceWillBeRemoved); 1041 } 1042 } 1043 // Remove incidental indentation. 1044 string = TextBlockSupport.stripIndent(string); 1045 } 1046 // Translate escape sequences if present. 1047 if (shouldTranslateEscapes && !hasBrokenEscapes) { 1048 string = TextBlockSupport.translateEscapes(string); 1049 } 1050 // Build string token. 1051 return new StringToken(tk, pos, endPos, string, comments); 1052 } 1053 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 1054 default: throw new AssertionError(); 1055 } 1056 } 1057 finally { 1058 if (scannerDebug) { 1059 System.out.println("nextToken(" + pos 1060 + "," + endPos + ")=|" + 1061 new String(reader.getRawCharacters(pos, endPos)) 1062 + "|"); 1063 } 1064 } 1065 } 1066 //where 1067 List<Comment> addComment(List<Comment> comments, Comment comment) { 1068 return comments == null ? 1069 List.of(comment) : 1070 comments.prepend(comment); 1071 } 1072 1073 /** Return the position where a lexical error occurred; 1074 */ 1075 public int errPos() { 1076 return errPos; 1077 } 1078 1079 /** Set the position where a lexical error occurred; 1080 */ 1081 public void errPos(int pos) { 1082 errPos = pos; 1083 } 1084 1085 /** 1086 * Called when a complete comment has been scanned. pos and endPos 1087 * will mark the comment boundary. 1088 */ 1089 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 1090 if (scannerDebug) 1091 System.out.println("processComment(" + pos 1092 + "," + endPos + "," + style + ")=|" 1093 + new String(reader.getRawCharacters(pos, endPos)) 1094 + "|"); 1095 char[] buf = reader.getRawCharacters(pos, endPos); 1096 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 1097 } 1098 1099 /** 1100 * Called when a complete whitespace run has been scanned. pos and endPos 1101 * will mark the whitespace boundary. 1102 */ 1103 protected void processWhiteSpace(int pos, int endPos) { 1104 if (scannerDebug) 1105 System.out.println("processWhitespace(" + pos 1106 + "," + endPos + ")=|" + 1107 new String(reader.getRawCharacters(pos, endPos)) 1108 + "|"); 1109 } 1110 1111 /** 1112 * Called when a line terminator has been processed. 1113 */ 1114 protected void processLineTerminator(int pos, int endPos) { 1115 if (scannerDebug) 1116 System.out.println("processTerminator(" + pos 1117 + "," + endPos + ")=|" + 1118 new String(reader.getRawCharacters(pos, endPos)) 1119 + "|"); 1120 } 1121 1122 /** Build a map for translating between line numbers and 1123 * positions in the input. 1124 * 1125 * @return a LineMap */ 1126 public Position.LineMap getLineMap() { 1127 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 1128 } 1129 1130 1131 /** 1132 * Scan a documentation comment; determine if a deprecated tag is present. 1133 * Called once the initial /, * have been skipped, positioned at the second * 1134 * (which is treated as the beginning of the first line). 1135 * Stops positioned at the closing '/'. 1136 */ 1137 protected static class BasicComment<U extends UnicodeReader> implements Comment { 1138 1139 CommentStyle cs; 1140 U comment_reader; 1141 1142 protected boolean deprecatedFlag = false; 1143 protected boolean scanned = false; 1144 1145 protected BasicComment(U comment_reader, CommentStyle cs) { 1146 this.comment_reader = comment_reader; 1147 this.cs = cs; 1148 } 1149 1150 public String getText() { 1151 return null; 1152 } 1153 1154 public int getSourcePos(int pos) { 1155 return -1; 1156 } 1157 1158 public CommentStyle getStyle() { 1159 return cs; 1160 } 1161 1162 public boolean isDeprecated() { 1163 if (!scanned && cs == CommentStyle.JAVADOC) { 1164 scanDocComment(); 1165 } 1166 return deprecatedFlag; 1167 } 1168 1169 @SuppressWarnings("fallthrough") 1170 protected void scanDocComment() { 1171 try { 1172 boolean deprecatedPrefix = false; 1173 1174 comment_reader.bp += 3; // '/**' 1175 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 1176 1177 forEachLine: 1178 while (comment_reader.bp < comment_reader.buflen) { 1179 1180 // Skip optional WhiteSpace at beginning of line 1181 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1182 comment_reader.scanCommentChar(); 1183 } 1184 1185 // Skip optional consecutive Stars 1186 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 1187 comment_reader.scanCommentChar(); 1188 if (comment_reader.ch == '/') { 1189 return; 1190 } 1191 } 1192 1193 // Skip optional WhiteSpace after Stars 1194 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 1195 comment_reader.scanCommentChar(); 1196 } 1197 1198 deprecatedPrefix = false; 1199 // At beginning of line in the JavaDoc sense. 1200 if (!deprecatedFlag) { 1201 String deprecated = "@deprecated"; 1202 int i = 0; 1203 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 1204 comment_reader.scanCommentChar(); 1205 i++; 1206 if (i == deprecated.length()) { 1207 deprecatedPrefix = true; 1208 break; 1209 } 1210 } 1211 } 1212 1213 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 1214 if (Character.isWhitespace(comment_reader.ch)) { 1215 deprecatedFlag = true; 1216 } else if (comment_reader.ch == '*') { 1217 comment_reader.scanCommentChar(); 1218 if (comment_reader.ch == '/') { 1219 deprecatedFlag = true; 1220 return; 1221 } 1222 } 1223 } 1224 1225 // Skip rest of line 1226 while (comment_reader.bp < comment_reader.buflen) { 1227 switch (comment_reader.ch) { 1228 case '*': 1229 comment_reader.scanCommentChar(); 1230 if (comment_reader.ch == '/') { 1231 return; 1232 } 1233 break; 1234 case CR: // (Spec 3.4) 1235 comment_reader.scanCommentChar(); 1236 if (comment_reader.ch != LF) { 1237 continue forEachLine; 1238 } 1239 /* fall through to LF case */ 1240 case LF: // (Spec 3.4) 1241 comment_reader.scanCommentChar(); 1242 continue forEachLine; 1243 default: 1244 comment_reader.scanCommentChar(); 1245 } 1246 } // rest of line 1247 } // forEachLine 1248 return; 1249 } finally { 1250 scanned = true; 1251 } 1252 } 1253 } 1254 }