1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.code.Preview; 29 import com.sun.tools.javac.code.Source; 30 import com.sun.tools.javac.code.Source.Feature; 31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 32 import com.sun.tools.javac.resources.CompilerProperties.Errors; 33 import com.sun.tools.javac.util.*; 34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag; 35 36 import java.nio.CharBuffer; 37 38 import static com.sun.tools.javac.parser.Tokens.*; 39 import static com.sun.tools.javac.util.LayoutCharacters.*; 40 41 /** The lexical analyzer maps an input stream consisting of 42 * ASCII characters and Unicode escapes into a token sequence. 43 * 44 * <p><b>This is NOT part of any supported API. 45 * If you write code that depends on this, you do so at your own risk. 46 * This code and its internal interfaces are subject to change or 47 * deletion without notice.</b> 48 */ 49 public class JavaTokenizer { 50 51 private static final boolean scannerDebug = false; 52 53 /** The source language setting. 54 */ 55 private Source source; 56 57 /** The preview language setting. */ 58 private Preview preview; 59 60 /** The log to be used for error reporting. 61 */ 62 private final Log log; 63 64 /** The token factory. */ 65 private final Tokens tokens; 66 67 /** The token kind, set by nextToken(). 68 */ 69 protected TokenKind tk; 70 71 /** The token's radix, set by nextToken(). 72 */ 73 protected int radix; 74 75 /** The token's name, set by nextToken(). 76 */ 77 protected Name name; 78 79 /** The position where a lexical error occurred; 80 */ 81 protected int errPos = Position.NOPOS; 82 83 /** The Unicode reader (low-level stream reader). 84 */ 85 protected UnicodeReader reader; 86 87 protected ScannerFactory fac; 88 89 private static final boolean hexFloatsWork = hexFloatsWork(); 90 private static boolean hexFloatsWork() { 91 try { 92 Float.valueOf("0x1.0p1"); 93 return true; 94 } catch (NumberFormatException ex) { 95 return false; 96 } 97 } 98 99 /** 100 * Create a scanner from the input array. This method might 101 * modify the array. To avoid copying the input array, ensure 102 * that {@code inputLength < input.length} or 103 * {@code input[input.length -1]} is a white space character. 104 * 105 * @param fac the factory which created this Scanner 106 * @param buf the input, might be modified 107 * Must be positive and less than or equal to input.length. 108 */ 109 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 110 this(fac, new UnicodeReader(fac, buf)); 111 } 112 113 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 114 this(fac, new UnicodeReader(fac, buf, inputLength)); 115 } 116 117 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 118 this.fac = fac; 119 this.log = fac.log; 120 this.tokens = fac.tokens; 121 this.source = fac.source; 122 this.preview = fac.preview; 123 this.reader = reader; 124 } 125 126 protected void checkSourceLevel(int pos, Feature feature) { 127 if (preview.isPreview(feature) && !preview.isEnabled()) { 128 //preview feature without --preview flag, error 129 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature)); 130 } else if (!feature.allowedInSource(source)) { 131 //incompatible source level, error 132 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name)); 133 } else if (preview.isPreview(feature)) { 134 //use of preview feature, warn 135 preview.warnPreview(pos, feature); 136 } 137 } 138 139 /** Report an error at the given position using the provided arguments. 140 */ 141 protected void lexError(int pos, JCDiagnostic.Error key) { 142 log.error(pos, key); 143 tk = TokenKind.ERROR; 144 errPos = pos; 145 } 146 147 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) { 148 log.error(flags, pos, key); 149 tk = TokenKind.ERROR; 150 errPos = pos; 151 } 152 153 /** Read next character in character or string literal and copy into sbuf. 154 */ 155 private void scanLitChar(int pos) { 156 if (reader.ch == '\\') { 157 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 158 reader.skipChar(); 159 reader.putChar('\\', true); 160 } else { 161 reader.scanChar(); 162 switch (reader.ch) { 163 case '0': case '1': case '2': case '3': 164 case '4': case '5': case '6': case '7': 165 char leadch = reader.ch; 166 int oct = reader.digit(pos, 8); 167 reader.scanChar(); 168 if ('0' <= reader.ch && reader.ch <= '7') { 169 oct = oct * 8 + reader.digit(pos, 8); 170 reader.scanChar(); 171 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 172 oct = oct * 8 + reader.digit(pos, 8); 173 reader.scanChar(); 174 } 175 } 176 reader.putChar((char)oct); 177 break; 178 case 'b': 179 reader.putChar('\b', true); break; 180 case 't': 181 reader.putChar('\t', true); break; 182 case 'n': 183 reader.putChar('\n', true); break; 184 case 'f': 185 reader.putChar('\f', true); break; 186 case 'r': 187 reader.putChar('\r', true); break; 188 case '\'': 189 reader.putChar('\'', true); break; 190 case '\"': 191 reader.putChar('\"', true); break; 192 case '\\': 193 reader.putChar('\\', true); break; 194 default: 195 lexError(reader.bp, Errors.IllegalEscChar); 196 } 197 } 198 } else if (reader.bp != reader.buflen) { 199 reader.putChar(true); 200 } 201 } 202 203 private void scanDigits(int pos, int digitRadix) { 204 char saveCh; 205 int savePos; 206 do { 207 if (reader.ch != '_') { 208 reader.putChar(false); 209 } else { 210 checkSourceLevel(pos, Feature.UNDERSCORES_IN_LITERALS); 211 } 212 saveCh = reader.ch; 213 savePos = reader.bp; 214 reader.scanChar(); 215 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 216 if (saveCh == '_') 217 lexError(savePos, Errors.IllegalUnderscore); 218 } 219 220 /** Read fractional part of hexadecimal floating point number. 221 */ 222 private void scanHexExponentAndSuffix(int pos) { 223 if (reader.ch == 'p' || reader.ch == 'P') { 224 reader.putChar(true); 225 skipIllegalUnderscores(); 226 if (reader.ch == '+' || reader.ch == '-') { 227 reader.putChar(true); 228 } 229 skipIllegalUnderscores(); 230 if (reader.digit(pos, 10) >= 0) { 231 scanDigits(pos, 10); 232 if (!hexFloatsWork) 233 lexError(pos, Errors.UnsupportedCrossFpLit); 234 } else 235 lexError(pos, Errors.MalformedFpLit); 236 } else { 237 lexError(pos, Errors.MalformedFpLit); 238 } 239 if (reader.ch == 'f' || reader.ch == 'F') { 240 reader.putChar(true); 241 tk = TokenKind.FLOATLITERAL; 242 radix = 16; 243 } else { 244 if (reader.ch == 'd' || reader.ch == 'D') { 245 reader.putChar(true); 246 } 247 tk = TokenKind.DOUBLELITERAL; 248 radix = 16; 249 } 250 } 251 252 /** Read fractional part of floating point number. 253 */ 254 private void scanFraction(int pos) { 255 skipIllegalUnderscores(); 256 if (reader.digit(pos, 10) >= 0) { 257 scanDigits(pos, 10); 258 } 259 int sp1 = reader.sp; 260 if (reader.ch == 'e' || reader.ch == 'E') { 261 reader.putChar(true); 262 skipIllegalUnderscores(); 263 if (reader.ch == '+' || reader.ch == '-') { 264 reader.putChar(true); 265 } 266 skipIllegalUnderscores(); 267 if (reader.digit(pos, 10) >= 0) { 268 scanDigits(pos, 10); 269 return; 270 } 271 lexError(pos, Errors.MalformedFpLit); 272 reader.sp = sp1; 273 } 274 } 275 276 /** Read fractional part and 'd' or 'f' suffix of floating point number. 277 */ 278 private void scanFractionAndSuffix(int pos) { 279 radix = 10; 280 scanFraction(pos); 281 if (reader.ch == 'f' || reader.ch == 'F') { 282 reader.putChar(true); 283 tk = TokenKind.FLOATLITERAL; 284 } else { 285 if (reader.ch == 'd' || reader.ch == 'D') { 286 reader.putChar(true); 287 } 288 tk = TokenKind.DOUBLELITERAL; 289 } 290 } 291 292 /** Read fractional part and 'd' or 'f' suffix of floating point number. 293 */ 294 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 295 radix = 16; 296 Assert.check(reader.ch == '.'); 297 reader.putChar(true); 298 skipIllegalUnderscores(); 299 if (reader.digit(pos, 16) >= 0) { 300 seendigit = true; 301 scanDigits(pos, 16); 302 } 303 if (!seendigit) 304 lexError(pos, Errors.InvalidHexNumber); 305 else 306 scanHexExponentAndSuffix(pos); 307 } 308 309 private void skipIllegalUnderscores() { 310 if (reader.ch == '_') { 311 lexError(reader.bp, Errors.IllegalUnderscore); 312 while (reader.ch == '_') 313 reader.scanChar(); 314 } 315 } 316 317 /** Read a number. 318 * @param radix The radix of the number; one of 2, 8, 10, 16. 319 */ 320 private void scanNumber(int pos, int radix) { 321 // for octal, allow base-10 digit in case it's a float literal 322 this.radix = radix; 323 int digitRadix = (radix == 8 ? 10 : radix); 324 int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); 325 boolean seendigit = firstDigit >= 0; 326 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; 327 if (seendigit) { 328 scanDigits(pos, digitRadix); 329 } 330 if (radix == 16 && reader.ch == '.') { 331 scanHexFractionAndSuffix(pos, seendigit); 332 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 333 scanHexExponentAndSuffix(pos); 334 } else if (digitRadix == 10 && reader.ch == '.') { 335 reader.putChar(true); 336 scanFractionAndSuffix(pos); 337 } else if (digitRadix == 10 && 338 (reader.ch == 'e' || reader.ch == 'E' || 339 reader.ch == 'f' || reader.ch == 'F' || 340 reader.ch == 'd' || reader.ch == 'D')) { 341 scanFractionAndSuffix(pos); 342 } else { 343 if (!seenValidDigit) { 344 switch (radix) { 345 case 2: 346 lexError(pos, Errors.InvalidBinaryNumber); 347 break; 348 case 16: 349 lexError(pos, Errors.InvalidHexNumber); 350 break; 351 } 352 } 353 if (reader.ch == 'l' || reader.ch == 'L') { 354 reader.scanChar(); 355 tk = TokenKind.LONGLITERAL; 356 } else { 357 tk = TokenKind.INTLITERAL; 358 } 359 } 360 } 361 362 /** Read an identifier. 363 */ 364 private void scanIdent() { 365 boolean isJavaIdentifierPart; 366 char high; 367 reader.putChar(true); 368 do { 369 switch (reader.ch) { 370 case 'A': case 'B': case 'C': case 'D': case 'E': 371 case 'F': case 'G': case 'H': case 'I': case 'J': 372 case 'K': case 'L': case 'M': case 'N': case 'O': 373 case 'P': case 'Q': case 'R': case 'S': case 'T': 374 case 'U': case 'V': case 'W': case 'X': case 'Y': 375 case 'Z': 376 case 'a': case 'b': case 'c': case 'd': case 'e': 377 case 'f': case 'g': case 'h': case 'i': case 'j': 378 case 'k': case 'l': case 'm': case 'n': case 'o': 379 case 'p': case 'q': case 'r': case 's': case 't': 380 case 'u': case 'v': case 'w': case 'x': case 'y': 381 case 'z': 382 case '$': case '_': 383 case '0': case '1': case '2': case '3': case '4': 384 case '5': case '6': case '7': case '8': case '9': 385 break; 386 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 387 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 388 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 389 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 390 case '\u0015': case '\u0016': case '\u0017': 391 case '\u0018': case '\u0019': case '\u001B': 392 case '\u007F': 393 reader.scanChar(); 394 continue; 395 case '\u001A': // EOI is also a legal identifier part 396 if (reader.bp >= reader.buflen) { 397 name = reader.name(); 398 tk = tokens.lookupKind(name); 399 return; 400 } 401 reader.scanChar(); 402 continue; 403 default: 404 if (reader.ch < '\u0080') { 405 // all ASCII range chars already handled, above 406 isJavaIdentifierPart = false; 407 } else { 408 if (Character.isIdentifierIgnorable(reader.ch)) { 409 reader.scanChar(); 410 continue; 411 } else { 412 int codePoint = reader.peekSurrogates(); 413 if (codePoint >= 0) { 414 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 415 reader.putChar(true); 416 } 417 } else { 418 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 419 } 420 } 421 } 422 if (!isJavaIdentifierPart) { 423 name = reader.name(); 424 tk = tokens.lookupKind(name); 425 return; 426 } 427 } 428 reader.putChar(true); 429 } while (true); 430 } 431 432 /** Return true if reader.ch can be part of an operator. 433 */ 434 private boolean isSpecial(char ch) { 435 switch (ch) { 436 case '!': case '%': case '&': case '*': case '?': 437 case '+': case '-': case ':': case '<': case '=': 438 case '>': case '^': case '|': case '~': 439 case '@': 440 return true; 441 default: 442 return false; 443 } 444 } 445 446 /** Read longest possible sequence of special characters and convert 447 * to token. 448 */ 449 private void scanOperator() { 450 while (true) { 451 reader.putChar(false); 452 Name newname = reader.name(); 453 TokenKind tk1 = tokens.lookupKind(newname); 454 if (tk1 == TokenKind.IDENTIFIER) { 455 reader.sp--; 456 break; 457 } 458 tk = tk1; 459 reader.scanChar(); 460 if (!isSpecial(reader.ch)) break; 461 } 462 } 463 464 /** Read token. 465 */ 466 public Token readToken() { 467 468 reader.sp = 0; 469 name = null; 470 radix = 0; 471 472 int pos = 0; 473 int endPos = 0; 474 List<Comment> comments = null; 475 476 try { 477 loop: while (true) { 478 pos = reader.bp; 479 switch (reader.ch) { 480 case ' ': // (Spec 3.6) 481 case '\t': // (Spec 3.6) 482 case FF: // (Spec 3.6) 483 do { 484 reader.scanChar(); 485 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 486 processWhiteSpace(pos, reader.bp); 487 break; 488 case LF: // (Spec 3.4) 489 reader.scanChar(); 490 processLineTerminator(pos, reader.bp); 491 break; 492 case CR: // (Spec 3.4) 493 reader.scanChar(); 494 if (reader.ch == LF) { 495 reader.scanChar(); 496 } 497 processLineTerminator(pos, reader.bp); 498 break; 499 case 'A': case 'B': case 'C': case 'D': case 'E': 500 case 'F': case 'G': case 'H': case 'I': case 'J': 501 case 'K': case 'L': case 'M': case 'N': case 'O': 502 case 'P': case 'Q': case 'R': case 'S': case 'T': 503 case 'U': case 'V': case 'W': case 'X': case 'Y': 504 case 'Z': 505 case 'a': case 'b': case 'c': case 'd': case 'e': 506 case 'f': case 'g': case 'h': case 'i': case 'j': 507 case 'k': case 'l': case 'm': case 'n': case 'o': 508 case 'p': case 'q': case 'r': case 's': case 't': 509 case 'u': case 'v': case 'w': case 'x': case 'y': 510 case 'z': 511 case '$': case '_': 512 scanIdent(); 513 break loop; 514 case '0': 515 reader.scanChar(); 516 if (reader.ch == 'x' || reader.ch == 'X') { 517 reader.scanChar(); 518 skipIllegalUnderscores(); 519 scanNumber(pos, 16); 520 } else if (reader.ch == 'b' || reader.ch == 'B') { 521 checkSourceLevel(pos, Feature.BINARY_LITERALS); 522 reader.scanChar(); 523 skipIllegalUnderscores(); 524 scanNumber(pos, 2); 525 } else { 526 reader.putChar('0'); 527 if (reader.ch == '_') { 528 int savePos = reader.bp; 529 do { 530 reader.scanChar(); 531 } while (reader.ch == '_'); 532 if (reader.digit(pos, 10) < 0) { 533 lexError(savePos, Errors.IllegalUnderscore); 534 } 535 } 536 scanNumber(pos, 8); 537 } 538 break loop; 539 case '1': case '2': case '3': case '4': 540 case '5': case '6': case '7': case '8': case '9': 541 scanNumber(pos, 10); 542 break loop; 543 case '.': 544 reader.scanChar(); 545 if (reader.digit(pos, 10) >= 0) { 546 reader.putChar('.'); 547 scanFractionAndSuffix(pos); 548 } else if (reader.ch == '.') { 549 int savePos = reader.bp; 550 reader.putChar('.'); reader.putChar('.', true); 551 if (reader.ch == '.') { 552 reader.scanChar(); 553 reader.putChar('.'); 554 tk = TokenKind.ELLIPSIS; 555 } else { 556 lexError(savePos, Errors.IllegalDot); 557 } 558 } else { 559 tk = TokenKind.DOT; 560 } 561 break loop; 562 case ',': 563 reader.scanChar(); tk = TokenKind.COMMA; break loop; 564 case ';': 565 reader.scanChar(); tk = TokenKind.SEMI; break loop; 566 case '(': 567 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 568 case ')': 569 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 570 case '[': 571 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 572 case ']': 573 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 574 case '{': 575 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 576 case '}': 577 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 578 case '/': 579 reader.scanChar(); 580 if (reader.ch == '/') { 581 do { 582 reader.scanCommentChar(); 583 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 584 if (reader.bp < reader.buflen) { 585 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 586 } 587 break; 588 } else if (reader.ch == '*') { 589 boolean isEmpty = false; 590 reader.scanChar(); 591 CommentStyle style; 592 if (reader.ch == '*') { 593 style = CommentStyle.JAVADOC; 594 reader.scanCommentChar(); 595 if (reader.ch == '/') { 596 isEmpty = true; 597 } 598 } else { 599 style = CommentStyle.BLOCK; 600 } 601 while (!isEmpty && reader.bp < reader.buflen) { 602 if (reader.ch == '*') { 603 reader.scanChar(); 604 if (reader.ch == '/') break; 605 } else { 606 reader.scanCommentChar(); 607 } 608 } 609 if (reader.ch == '/') { 610 reader.scanChar(); 611 comments = addComment(comments, processComment(pos, reader.bp, style)); 612 break; 613 } else { 614 lexError(pos, Errors.UnclosedComment); 615 break loop; 616 } 617 } else if (reader.ch == '=') { 618 tk = TokenKind.SLASHEQ; 619 reader.scanChar(); 620 } else { 621 tk = TokenKind.SLASH; 622 } 623 break loop; 624 case '\'': 625 reader.scanChar(); 626 if (reader.ch == '\'') { 627 lexError(pos, Errors.EmptyCharLit); 628 reader.scanChar(); 629 } else { 630 if (reader.ch == CR || reader.ch == LF) 631 lexError(pos, Errors.IllegalLineEndInCharLit); 632 scanLitChar(pos); 633 if (reader.ch == '\'') { 634 reader.scanChar(); 635 tk = TokenKind.CHARLITERAL; 636 } else { 637 lexError(pos, Errors.UnclosedCharLit); 638 } 639 } 640 break loop; 641 case '\"': 642 reader.scanChar(); 643 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 644 scanLitChar(pos); 645 if (reader.ch == '\"') { 646 tk = TokenKind.STRINGLITERAL; 647 reader.scanChar(); 648 } else { 649 lexError(pos, Errors.UnclosedStrLit); 650 } 651 break loop; 652 default: 653 if (isSpecial(reader.ch)) { 654 scanOperator(); 655 } else { 656 boolean isJavaIdentifierStart; 657 int codePoint = -1; 658 if (reader.ch < '\u0080') { 659 // all ASCII range chars already handled, above 660 isJavaIdentifierStart = false; 661 } else { 662 codePoint = reader.peekSurrogates(); 663 if (codePoint >= 0) { 664 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 665 reader.putChar(true); 666 } 667 } else { 668 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 669 } 670 } 671 if (isJavaIdentifierStart) { 672 scanIdent(); 673 } else if (reader.digit(pos, 10) >= 0) { 674 scanNumber(pos, 10); 675 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 676 tk = TokenKind.EOF; 677 pos = reader.realLength; 678 } else { 679 String arg; 680 681 if (codePoint >= 0) { 682 char high = reader.ch; 683 reader.scanChar(); 684 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 685 } else { 686 arg = (32 < reader.ch && reader.ch < 127) ? 687 String.format("%s", reader.ch) : 688 String.format("\\u%04x", (int)reader.ch); 689 } 690 lexError(pos, Errors.IllegalChar(arg)); 691 reader.scanChar(); 692 } 693 } 694 break loop; 695 } 696 } 697 endPos = reader.bp; 698 switch (tk.tag) { 699 case DEFAULT: return new Token(tk, pos, endPos, comments); 700 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 701 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 702 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 703 default: throw new AssertionError(); 704 } 705 } 706 finally { 707 if (scannerDebug) { 708 System.out.println("nextToken(" + pos 709 + "," + endPos + ")=|" + 710 new String(reader.getRawCharacters(pos, endPos)) 711 + "|"); 712 } 713 } 714 } 715 //where 716 List<Comment> addComment(List<Comment> comments, Comment comment) { 717 return comments == null ? 718 List.of(comment) : 719 comments.prepend(comment); 720 } 721 722 /** Return the position where a lexical error occurred; 723 */ 724 public int errPos() { 725 return errPos; 726 } 727 728 /** Set the position where a lexical error occurred; 729 */ 730 public void errPos(int pos) { 731 errPos = pos; 732 } 733 734 /** 735 * Called when a complete comment has been scanned. pos and endPos 736 * will mark the comment boundary. 737 */ 738 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 739 if (scannerDebug) 740 System.out.println("processComment(" + pos 741 + "," + endPos + "," + style + ")=|" 742 + new String(reader.getRawCharacters(pos, endPos)) 743 + "|"); 744 char[] buf = reader.getRawCharacters(pos, endPos); 745 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 746 } 747 748 /** 749 * Called when a complete whitespace run has been scanned. pos and endPos 750 * will mark the whitespace boundary. 751 */ 752 protected void processWhiteSpace(int pos, int endPos) { 753 if (scannerDebug) 754 System.out.println("processWhitespace(" + pos 755 + "," + endPos + ")=|" + 756 new String(reader.getRawCharacters(pos, endPos)) 757 + "|"); 758 } 759 760 /** 761 * Called when a line terminator has been processed. 762 */ 763 protected void processLineTerminator(int pos, int endPos) { 764 if (scannerDebug) 765 System.out.println("processTerminator(" + pos 766 + "," + endPos + ")=|" + 767 new String(reader.getRawCharacters(pos, endPos)) 768 + "|"); 769 } 770 771 /** Build a map for translating between line numbers and 772 * positions in the input. 773 * 774 * @return a LineMap */ 775 public Position.LineMap getLineMap() { 776 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 777 } 778 779 780 /** 781 * Scan a documentation comment; determine if a deprecated tag is present. 782 * Called once the initial /, * have been skipped, positioned at the second * 783 * (which is treated as the beginning of the first line). 784 * Stops positioned at the closing '/'. 785 */ 786 protected static class BasicComment<U extends UnicodeReader> implements Comment { 787 788 CommentStyle cs; 789 U comment_reader; 790 791 protected boolean deprecatedFlag = false; 792 protected boolean scanned = false; 793 794 protected BasicComment(U comment_reader, CommentStyle cs) { 795 this.comment_reader = comment_reader; 796 this.cs = cs; 797 } 798 799 public String getText() { 800 return null; 801 } 802 803 public int getSourcePos(int pos) { 804 return -1; 805 } 806 807 public CommentStyle getStyle() { 808 return cs; 809 } 810 811 public boolean isDeprecated() { 812 if (!scanned && cs == CommentStyle.JAVADOC) { 813 scanDocComment(); 814 } 815 return deprecatedFlag; 816 } 817 818 @SuppressWarnings("fallthrough") 819 protected void scanDocComment() { 820 try { 821 boolean deprecatedPrefix = false; 822 823 comment_reader.bp += 3; // '/**' 824 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 825 826 forEachLine: 827 while (comment_reader.bp < comment_reader.buflen) { 828 829 // Skip optional WhiteSpace at beginning of line 830 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 831 comment_reader.scanCommentChar(); 832 } 833 834 // Skip optional consecutive Stars 835 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 836 comment_reader.scanCommentChar(); 837 if (comment_reader.ch == '/') { 838 return; 839 } 840 } 841 842 // Skip optional WhiteSpace after Stars 843 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 844 comment_reader.scanCommentChar(); 845 } 846 847 deprecatedPrefix = false; 848 // At beginning of line in the JavaDoc sense. 849 if (!deprecatedFlag) { 850 String deprecated = "@deprecated"; 851 int i = 0; 852 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 853 comment_reader.scanCommentChar(); 854 i++; 855 if (i == deprecated.length()) { 856 deprecatedPrefix = true; 857 break; 858 } 859 } 860 } 861 862 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 863 if (Character.isWhitespace(comment_reader.ch)) { 864 deprecatedFlag = true; 865 } else if (comment_reader.ch == '*') { 866 comment_reader.scanCommentChar(); 867 if (comment_reader.ch == '/') { 868 deprecatedFlag = true; 869 return; 870 } 871 } 872 } 873 874 // Skip rest of line 875 while (comment_reader.bp < comment_reader.buflen) { 876 switch (comment_reader.ch) { 877 case '*': 878 comment_reader.scanCommentChar(); 879 if (comment_reader.ch == '/') { 880 return; 881 } 882 break; 883 case CR: // (Spec 3.4) 884 comment_reader.scanCommentChar(); 885 if (comment_reader.ch != LF) { 886 continue forEachLine; 887 } 888 /* fall through to LF case */ 889 case LF: // (Spec 3.4) 890 comment_reader.scanCommentChar(); 891 continue forEachLine; 892 default: 893 comment_reader.scanCommentChar(); 894 } 895 } // rest of line 896 } // forEachLine 897 return; 898 } finally { 899 scanned = true; 900 } 901 } 902 } 903 }