1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.code.Preview; 29 import com.sun.tools.javac.code.Source; 30 import com.sun.tools.javac.code.Source.Feature; 31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 32 import com.sun.tools.javac.resources.CompilerProperties.Errors; 33 import com.sun.tools.javac.util.*; 34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag; 35 36 import java.nio.CharBuffer; 37 38 import static com.sun.tools.javac.parser.Tokens.*; 39 import static com.sun.tools.javac.util.LayoutCharacters.*; 40 41 /** The lexical analyzer maps an input stream consisting of 42 * ASCII characters and Unicode escapes into a token sequence. 43 * 44 * <p><b>This is NOT part of any supported API. 45 * If you write code that depends on this, you do so at your own risk. 46 * This code and its internal interfaces are subject to change or 47 * deletion without notice.</b> 48 */ 49 public class JavaTokenizer { 50 51 private static final boolean scannerDebug = false; 52 53 /** The source language setting. 54 */ 55 private Source source; 56 57 /** The preview language setting. */ 58 private Preview preview; 59 60 /** The log to be used for error reporting. 61 */ 62 private final Log log; 63 64 /** The token factory. */ 65 private final Tokens tokens; 66 67 /** The token kind, set by nextToken(). 68 */ 69 protected TokenKind tk; 70 71 /** The token's radix, set by nextToken(). 72 */ 73 protected int radix; 74 75 /** The token's name, set by nextToken(). 76 */ 77 protected Name name; 78 79 /** The position where a lexical error occurred; 80 */ 81 protected int errPos = Position.NOPOS; 82 83 /** The Unicode reader (low-level stream reader). 84 */ 85 protected UnicodeReader reader; 86 87 protected ScannerFactory fac; 88 89 private static final boolean hexFloatsWork = hexFloatsWork(); 90 private static boolean hexFloatsWork() { 91 try { 92 Float.valueOf("0x1.0p1"); 93 return true; 94 } catch (NumberFormatException ex) { 95 return false; 96 } 97 } 98 99 /** 100 * Create a scanner from the input array. This method might 101 * modify the array. To avoid copying the input array, ensure 102 * that {@code inputLength < input.length} or 103 * {@code input[input.length -1]} is a white space character. 104 * 105 * @param fac the factory which created this Scanner 106 * @param buf the input, might be modified 107 * Must be positive and less than or equal to input.length. 108 */ 109 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 110 this(fac, new UnicodeReader(fac, buf)); 111 } 112 113 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 114 this(fac, new UnicodeReader(fac, buf, inputLength)); 115 } 116 117 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 118 this.fac = fac; 119 this.log = fac.log; 120 this.tokens = fac.tokens; 121 this.source = fac.source; 122 this.preview = fac.preview; 123 this.reader = reader; 124 } 125 126 protected void checkSourceLevel(int pos, Feature feature) { 127 if (preview.isPreview(feature) && !preview.isEnabled()) { 128 //preview feature without --preview flag, error 129 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature)); 130 } else if (!feature.allowedInSource(source)) { 131 //incompatible source level, error 132 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name)); 133 } else if (preview.isPreview(feature)) { 134 //use of preview feature, warn 135 preview.warnPreview(pos, feature); 136 } 137 } 138 139 /** Report an error at the given position using the provided arguments. 140 */ 141 protected void lexError(int pos, JCDiagnostic.Error key) { 142 log.error(pos, key); 143 tk = TokenKind.ERROR; 144 errPos = pos; 145 } 146 147 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) { 148 log.error(flags, pos, key); 149 tk = TokenKind.ERROR; 150 errPos = pos; 151 } 152 153 /** Read next character in character or string literal and copy into sbuf. 154 */ 155 private void scanLitChar(int pos) { 156 if (reader.ch == '\\') { 157 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 158 reader.skipChar(); 159 reader.putChar('\\', true); 160 } else { 161 reader.scanChar(); 162 switch (reader.ch) { 163 case '0': case '1': case '2': case '3': 164 case '4': case '5': case '6': case '7': 165 char leadch = reader.ch; 166 int oct = reader.digit(pos, 8); 167 reader.scanChar(); 168 if ('0' <= reader.ch && reader.ch <= '7') { 169 oct = oct * 8 + reader.digit(pos, 8); 170 reader.scanChar(); 171 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 172 oct = oct * 8 + reader.digit(pos, 8); 173 reader.scanChar(); 174 } 175 } 176 reader.putChar((char)oct); 177 break; 178 case 'b': 179 reader.putChar('\b', true); break; 180 case 't': 181 reader.putChar('\t', true); break; 182 case 'n': 183 reader.putChar('\n', true); break; 184 case 'f': 185 reader.putChar('\f', true); break; 186 case 'r': 187 reader.putChar('\r', true); break; 188 case '\'': 189 reader.putChar('\'', true); break; 190 case '\"': 191 reader.putChar('\"', true); break; 192 case '\\': 193 reader.putChar('\\', true); break; 194 default: 195 lexError(reader.bp, Errors.IllegalEscChar); 196 } 197 } 198 } else if (reader.bp != reader.buflen) { 199 reader.putChar(true); 200 } 201 } 202 203 private void scanDigits(int pos, int digitRadix) { 204 char saveCh; 205 int savePos; 206 do { 207 if (reader.ch != '_') { 208 reader.putChar(false); 209 } 210 saveCh = reader.ch; 211 savePos = reader.bp; 212 reader.scanChar(); 213 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 214 if (saveCh == '_') 215 lexError(savePos, Errors.IllegalUnderscore); 216 } 217 218 /** Read fractional part of hexadecimal floating point number. 219 */ 220 private void scanHexExponentAndSuffix(int pos) { 221 if (reader.ch == 'p' || reader.ch == 'P') { 222 reader.putChar(true); 223 skipIllegalUnderscores(); 224 if (reader.ch == '+' || reader.ch == '-') { 225 reader.putChar(true); 226 } 227 skipIllegalUnderscores(); 228 if (reader.digit(pos, 10) >= 0) { 229 scanDigits(pos, 10); 230 if (!hexFloatsWork) 231 lexError(pos, Errors.UnsupportedCrossFpLit); 232 } else 233 lexError(pos, Errors.MalformedFpLit); 234 } else { 235 lexError(pos, Errors.MalformedFpLit); 236 } 237 if (reader.ch == 'f' || reader.ch == 'F') { 238 reader.putChar(true); 239 tk = TokenKind.FLOATLITERAL; 240 radix = 16; 241 } else { 242 if (reader.ch == 'd' || reader.ch == 'D') { 243 reader.putChar(true); 244 } 245 tk = TokenKind.DOUBLELITERAL; 246 radix = 16; 247 } 248 } 249 250 /** Read fractional part of floating point number. 251 */ 252 private void scanFraction(int pos) { 253 skipIllegalUnderscores(); 254 if (reader.digit(pos, 10) >= 0) { 255 scanDigits(pos, 10); 256 } 257 int sp1 = reader.sp; 258 if (reader.ch == 'e' || reader.ch == 'E') { 259 reader.putChar(true); 260 skipIllegalUnderscores(); 261 if (reader.ch == '+' || reader.ch == '-') { 262 reader.putChar(true); 263 } 264 skipIllegalUnderscores(); 265 if (reader.digit(pos, 10) >= 0) { 266 scanDigits(pos, 10); 267 return; 268 } 269 lexError(pos, Errors.MalformedFpLit); 270 reader.sp = sp1; 271 } 272 } 273 274 /** Read fractional part and 'd' or 'f' suffix of floating point number. 275 */ 276 private void scanFractionAndSuffix(int pos) { 277 radix = 10; 278 scanFraction(pos); 279 if (reader.ch == 'f' || reader.ch == 'F') { 280 reader.putChar(true); 281 tk = TokenKind.FLOATLITERAL; 282 } else { 283 if (reader.ch == 'd' || reader.ch == 'D') { 284 reader.putChar(true); 285 } 286 tk = TokenKind.DOUBLELITERAL; 287 } 288 } 289 290 /** Read fractional part and 'd' or 'f' suffix of floating point number. 291 */ 292 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 293 radix = 16; 294 Assert.check(reader.ch == '.'); 295 reader.putChar(true); 296 skipIllegalUnderscores(); 297 if (reader.digit(pos, 16) >= 0) { 298 seendigit = true; 299 scanDigits(pos, 16); 300 } 301 if (!seendigit) 302 lexError(pos, Errors.InvalidHexNumber); 303 else 304 scanHexExponentAndSuffix(pos); 305 } 306 307 private void skipIllegalUnderscores() { 308 if (reader.ch == '_') { 309 lexError(reader.bp, Errors.IllegalUnderscore); 310 while (reader.ch == '_') 311 reader.scanChar(); 312 } 313 } 314 315 /** Read a number. 316 * @param radix The radix of the number; one of 2, 8, 10, 16. 317 */ 318 private void scanNumber(int pos, int radix) { 319 // for octal, allow base-10 digit in case it's a float literal 320 this.radix = radix; 321 int digitRadix = (radix == 8 ? 10 : radix); 322 int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); 323 boolean seendigit = firstDigit >= 0; 324 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; 325 if (seendigit) { 326 scanDigits(pos, digitRadix); 327 } 328 if (radix == 16 && reader.ch == '.') { 329 scanHexFractionAndSuffix(pos, seendigit); 330 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 331 scanHexExponentAndSuffix(pos); 332 } else if (digitRadix == 10 && reader.ch == '.') { 333 reader.putChar(true); 334 scanFractionAndSuffix(pos); 335 } else if (digitRadix == 10 && 336 (reader.ch == 'e' || reader.ch == 'E' || 337 reader.ch == 'f' || reader.ch == 'F' || 338 reader.ch == 'd' || reader.ch == 'D')) { 339 scanFractionAndSuffix(pos); 340 } else { 341 if (!seenValidDigit) { 342 switch (radix) { 343 case 2: 344 lexError(pos, Errors.InvalidBinaryNumber); 345 break; 346 case 16: 347 lexError(pos, Errors.InvalidHexNumber); 348 break; 349 } 350 } 351 if (reader.ch == 'l' || reader.ch == 'L') { 352 reader.scanChar(); 353 tk = TokenKind.LONGLITERAL; 354 } else { 355 tk = TokenKind.INTLITERAL; 356 } 357 } 358 } 359 360 /** Read an identifier. 361 */ 362 private void scanIdent() { 363 boolean isJavaIdentifierPart; 364 char high; 365 reader.putChar(true); 366 do { 367 switch (reader.ch) { 368 case 'A': case 'B': case 'C': case 'D': case 'E': 369 case 'F': case 'G': case 'H': case 'I': case 'J': 370 case 'K': case 'L': case 'M': case 'N': case 'O': 371 case 'P': case 'Q': case 'R': case 'S': case 'T': 372 case 'U': case 'V': case 'W': case 'X': case 'Y': 373 case 'Z': 374 case 'a': case 'b': case 'c': case 'd': case 'e': 375 case 'f': case 'g': case 'h': case 'i': case 'j': 376 case 'k': case 'l': case 'm': case 'n': case 'o': 377 case 'p': case 'q': case 'r': case 's': case 't': 378 case 'u': case 'v': case 'w': case 'x': case 'y': 379 case 'z': 380 case '$': case '_': 381 case '0': case '1': case '2': case '3': case '4': 382 case '5': case '6': case '7': case '8': case '9': 383 break; 384 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 385 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 386 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 387 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 388 case '\u0015': case '\u0016': case '\u0017': 389 case '\u0018': case '\u0019': case '\u001B': 390 case '\u007F': 391 reader.scanChar(); 392 continue; 393 case '\u001A': // EOI is also a legal identifier part 394 if (reader.bp >= reader.buflen) { 395 name = reader.name(); 396 tk = tokens.lookupKind(name); 397 return; 398 } 399 reader.scanChar(); 400 continue; 401 default: 402 if (reader.ch < '\u0080') { 403 // all ASCII range chars already handled, above 404 isJavaIdentifierPart = false; 405 } else { 406 if (Character.isIdentifierIgnorable(reader.ch)) { 407 reader.scanChar(); 408 continue; 409 } else { 410 int codePoint = reader.peekSurrogates(); 411 if (codePoint >= 0) { 412 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { 413 reader.putChar(true); 414 } 415 } else { 416 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 417 } 418 } 419 } 420 if (!isJavaIdentifierPart) { 421 name = reader.name(); 422 tk = tokens.lookupKind(name); 423 return; 424 } 425 } 426 reader.putChar(true); 427 } while (true); 428 } 429 430 /** Return true if reader.ch can be part of an operator. 431 */ 432 private boolean isSpecial(char ch) { 433 switch (ch) { 434 case '!': case '%': case '&': case '*': case '?': 435 case '+': case '-': case ':': case '<': case '=': 436 case '>': case '^': case '|': case '~': 437 case '@': 438 return true; 439 default: 440 return false; 441 } 442 } 443 444 /** Read longest possible sequence of special characters and convert 445 * to token. 446 */ 447 private void scanOperator() { 448 while (true) { 449 reader.putChar(false); 450 Name newname = reader.name(); 451 TokenKind tk1 = tokens.lookupKind(newname); 452 if (tk1 == TokenKind.IDENTIFIER) { 453 reader.sp--; 454 break; 455 } 456 tk = tk1; 457 reader.scanChar(); 458 if (!isSpecial(reader.ch)) break; 459 } 460 } 461 462 /** Read token. 463 */ 464 public Token readToken() { 465 466 reader.sp = 0; 467 name = null; 468 radix = 0; 469 470 int pos = 0; 471 int endPos = 0; 472 List<Comment> comments = null; 473 474 try { 475 loop: while (true) { 476 pos = reader.bp; 477 switch (reader.ch) { 478 case ' ': // (Spec 3.6) 479 case '\t': // (Spec 3.6) 480 case FF: // (Spec 3.6) 481 do { 482 reader.scanChar(); 483 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 484 processWhiteSpace(pos, reader.bp); 485 break; 486 case LF: // (Spec 3.4) 487 reader.scanChar(); 488 processLineTerminator(pos, reader.bp); 489 break; 490 case CR: // (Spec 3.4) 491 reader.scanChar(); 492 if (reader.ch == LF) { 493 reader.scanChar(); 494 } 495 processLineTerminator(pos, reader.bp); 496 break; 497 case 'A': case 'B': case 'C': case 'D': case 'E': 498 case 'F': case 'G': case 'H': case 'I': case 'J': 499 case 'K': case 'L': case 'M': case 'N': case 'O': 500 case 'P': case 'Q': case 'R': case 'S': case 'T': 501 case 'U': case 'V': case 'W': case 'X': case 'Y': 502 case 'Z': 503 case 'a': case 'b': case 'c': case 'd': case 'e': 504 case 'f': case 'g': case 'h': case 'i': case 'j': 505 case 'k': case 'l': case 'm': case 'n': case 'o': 506 case 'p': case 'q': case 'r': case 's': case 't': 507 case 'u': case 'v': case 'w': case 'x': case 'y': 508 case 'z': 509 case '$': case '_': 510 scanIdent(); 511 break loop; 512 case '0': 513 reader.scanChar(); 514 if (reader.ch == 'x' || reader.ch == 'X') { 515 reader.scanChar(); 516 skipIllegalUnderscores(); 517 scanNumber(pos, 16); 518 } else if (reader.ch == 'b' || reader.ch == 'B') { 519 reader.scanChar(); 520 skipIllegalUnderscores(); 521 scanNumber(pos, 2); 522 } else { 523 reader.putChar('0'); 524 if (reader.ch == '_') { 525 int savePos = reader.bp; 526 do { 527 reader.scanChar(); 528 } while (reader.ch == '_'); 529 if (reader.digit(pos, 10) < 0) { 530 lexError(savePos, Errors.IllegalUnderscore); 531 } 532 } 533 scanNumber(pos, 8); 534 } 535 break loop; 536 case '1': case '2': case '3': case '4': 537 case '5': case '6': case '7': case '8': case '9': 538 scanNumber(pos, 10); 539 break loop; 540 case '.': 541 reader.scanChar(); 542 if (reader.digit(pos, 10) >= 0) { 543 reader.putChar('.'); 544 scanFractionAndSuffix(pos); 545 } else if (reader.ch == '.') { 546 int savePos = reader.bp; 547 reader.putChar('.'); reader.putChar('.', true); 548 if (reader.ch == '.') { 549 reader.scanChar(); 550 reader.putChar('.'); 551 tk = TokenKind.ELLIPSIS; 552 } else { 553 lexError(savePos, Errors.IllegalDot); 554 } 555 } else { 556 tk = TokenKind.DOT; 557 } 558 break loop; 559 case ',': 560 reader.scanChar(); tk = TokenKind.COMMA; break loop; 561 case ';': 562 reader.scanChar(); tk = TokenKind.SEMI; break loop; 563 case '(': 564 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 565 case ')': 566 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 567 case '[': 568 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 569 case ']': 570 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 571 case '{': 572 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 573 case '}': 574 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 575 case '/': 576 reader.scanChar(); 577 if (reader.ch == '/') { 578 do { 579 reader.scanCommentChar(); 580 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 581 if (reader.bp < reader.buflen) { 582 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 583 } 584 break; 585 } else if (reader.ch == '*') { 586 boolean isEmpty = false; 587 reader.scanChar(); 588 CommentStyle style; 589 if (reader.ch == '*') { 590 style = CommentStyle.JAVADOC; 591 reader.scanCommentChar(); 592 if (reader.ch == '/') { 593 isEmpty = true; 594 } 595 } else { 596 style = CommentStyle.BLOCK; 597 } 598 while (!isEmpty && reader.bp < reader.buflen) { 599 if (reader.ch == '*') { 600 reader.scanChar(); 601 if (reader.ch == '/') break; 602 } else { 603 reader.scanCommentChar(); 604 } 605 } 606 if (reader.ch == '/') { 607 reader.scanChar(); 608 comments = addComment(comments, processComment(pos, reader.bp, style)); 609 break; 610 } else { 611 lexError(pos, Errors.UnclosedComment); 612 break loop; 613 } 614 } else if (reader.ch == '=') { 615 tk = TokenKind.SLASHEQ; 616 reader.scanChar(); 617 } else { 618 tk = TokenKind.SLASH; 619 } 620 break loop; 621 case '\'': 622 reader.scanChar(); 623 if (reader.ch == '\'') { 624 lexError(pos, Errors.EmptyCharLit); 625 reader.scanChar(); 626 } else { 627 if (reader.ch == CR || reader.ch == LF) 628 lexError(pos, Errors.IllegalLineEndInCharLit); 629 scanLitChar(pos); 630 if (reader.ch == '\'') { 631 reader.scanChar(); 632 tk = TokenKind.CHARLITERAL; 633 } else { 634 lexError(pos, Errors.UnclosedCharLit); 635 } 636 } 637 break loop; 638 case '\"': 639 reader.scanChar(); 640 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 641 scanLitChar(pos); 642 if (reader.ch == '\"') { 643 tk = TokenKind.STRINGLITERAL; 644 reader.scanChar(); 645 } else { 646 lexError(pos, Errors.UnclosedStrLit); 647 } 648 break loop; 649 default: 650 if (isSpecial(reader.ch)) { 651 scanOperator(); 652 } else { 653 boolean isJavaIdentifierStart; 654 int codePoint = -1; 655 if (reader.ch < '\u0080') { 656 // all ASCII range chars already handled, above 657 isJavaIdentifierStart = false; 658 } else { 659 codePoint = reader.peekSurrogates(); 660 if (codePoint >= 0) { 661 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { 662 reader.putChar(true); 663 } 664 } else { 665 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 666 } 667 } 668 if (isJavaIdentifierStart) { 669 scanIdent(); 670 } else if (reader.digit(pos, 10) >= 0) { 671 scanNumber(pos, 10); 672 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 673 tk = TokenKind.EOF; 674 pos = reader.realLength; 675 } else { 676 String arg; 677 678 if (codePoint >= 0) { 679 char high = reader.ch; 680 reader.scanChar(); 681 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); 682 } else { 683 arg = (32 < reader.ch && reader.ch < 127) ? 684 String.format("%s", reader.ch) : 685 String.format("\\u%04x", (int)reader.ch); 686 } 687 lexError(pos, Errors.IllegalChar(arg)); 688 reader.scanChar(); 689 } 690 } 691 break loop; 692 } 693 } 694 endPos = reader.bp; 695 switch (tk.tag) { 696 case DEFAULT: return new Token(tk, pos, endPos, comments); 697 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 698 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 699 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 700 default: throw new AssertionError(); 701 } 702 } 703 finally { 704 if (scannerDebug) { 705 System.out.println("nextToken(" + pos 706 + "," + endPos + ")=|" + 707 new String(reader.getRawCharacters(pos, endPos)) 708 + "|"); 709 } 710 } 711 } 712 //where 713 List<Comment> addComment(List<Comment> comments, Comment comment) { 714 return comments == null ? 715 List.of(comment) : 716 comments.prepend(comment); 717 } 718 719 /** Return the position where a lexical error occurred; 720 */ 721 public int errPos() { 722 return errPos; 723 } 724 725 /** Set the position where a lexical error occurred; 726 */ 727 public void errPos(int pos) { 728 errPos = pos; 729 } 730 731 /** 732 * Called when a complete comment has been scanned. pos and endPos 733 * will mark the comment boundary. 734 */ 735 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 736 if (scannerDebug) 737 System.out.println("processComment(" + pos 738 + "," + endPos + "," + style + ")=|" 739 + new String(reader.getRawCharacters(pos, endPos)) 740 + "|"); 741 char[] buf = reader.getRawCharacters(pos, endPos); 742 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); 743 } 744 745 /** 746 * Called when a complete whitespace run has been scanned. pos and endPos 747 * will mark the whitespace boundary. 748 */ 749 protected void processWhiteSpace(int pos, int endPos) { 750 if (scannerDebug) 751 System.out.println("processWhitespace(" + pos 752 + "," + endPos + ")=|" + 753 new String(reader.getRawCharacters(pos, endPos)) 754 + "|"); 755 } 756 757 /** 758 * Called when a line terminator has been processed. 759 */ 760 protected void processLineTerminator(int pos, int endPos) { 761 if (scannerDebug) 762 System.out.println("processTerminator(" + pos 763 + "," + endPos + ")=|" + 764 new String(reader.getRawCharacters(pos, endPos)) 765 + "|"); 766 } 767 768 /** Build a map for translating between line numbers and 769 * positions in the input. 770 * 771 * @return a LineMap */ 772 public Position.LineMap getLineMap() { 773 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 774 } 775 776 777 /** 778 * Scan a documentation comment; determine if a deprecated tag is present. 779 * Called once the initial /, * have been skipped, positioned at the second * 780 * (which is treated as the beginning of the first line). 781 * Stops positioned at the closing '/'. 782 */ 783 protected static class BasicComment<U extends UnicodeReader> implements Comment { 784 785 CommentStyle cs; 786 U comment_reader; 787 788 protected boolean deprecatedFlag = false; 789 protected boolean scanned = false; 790 791 protected BasicComment(U comment_reader, CommentStyle cs) { 792 this.comment_reader = comment_reader; 793 this.cs = cs; 794 } 795 796 public String getText() { 797 return null; 798 } 799 800 public int getSourcePos(int pos) { 801 return -1; 802 } 803 804 public CommentStyle getStyle() { 805 return cs; 806 } 807 808 public boolean isDeprecated() { 809 if (!scanned && cs == CommentStyle.JAVADOC) { 810 scanDocComment(); 811 } 812 return deprecatedFlag; 813 } 814 815 @SuppressWarnings("fallthrough") 816 protected void scanDocComment() { 817 try { 818 boolean deprecatedPrefix = false; 819 820 comment_reader.bp += 3; // '/**' 821 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 822 823 forEachLine: 824 while (comment_reader.bp < comment_reader.buflen) { 825 826 // Skip optional WhiteSpace at beginning of line 827 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 828 comment_reader.scanCommentChar(); 829 } 830 831 // Skip optional consecutive Stars 832 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 833 comment_reader.scanCommentChar(); 834 if (comment_reader.ch == '/') { 835 return; 836 } 837 } 838 839 // Skip optional WhiteSpace after Stars 840 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 841 comment_reader.scanCommentChar(); 842 } 843 844 deprecatedPrefix = false; 845 // At beginning of line in the JavaDoc sense. 846 if (!deprecatedFlag) { 847 String deprecated = "@deprecated"; 848 int i = 0; 849 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 850 comment_reader.scanCommentChar(); 851 i++; 852 if (i == deprecated.length()) { 853 deprecatedPrefix = true; 854 break; 855 } 856 } 857 } 858 859 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 860 if (Character.isWhitespace(comment_reader.ch)) { 861 deprecatedFlag = true; 862 } else if (comment_reader.ch == '*') { 863 comment_reader.scanCommentChar(); 864 if (comment_reader.ch == '/') { 865 deprecatedFlag = true; 866 return; 867 } 868 } 869 } 870 871 // Skip rest of line 872 while (comment_reader.bp < comment_reader.buflen) { 873 switch (comment_reader.ch) { 874 case '*': 875 comment_reader.scanCommentChar(); 876 if (comment_reader.ch == '/') { 877 return; 878 } 879 break; 880 case CR: // (Spec 3.4) 881 comment_reader.scanCommentChar(); 882 if (comment_reader.ch != LF) { 883 continue forEachLine; 884 } 885 /* fall through to LF case */ 886 case LF: // (Spec 3.4) 887 comment_reader.scanCommentChar(); 888 continue forEachLine; 889 default: 890 comment_reader.scanCommentChar(); 891 } 892 } // rest of line 893 } // forEachLine 894 return; 895 } finally { 896 scanned = true; 897 } 898 } 899 } 900 }