1 /* 2 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import java.nio.*; 29 30 import com.sun.tools.javac.code.Source; 31 import com.sun.tools.javac.file.JavacFileManager; 32 import com.sun.tools.javac.util.*; 33 34 35 import static com.sun.tools.javac.parser.Token.*; 36 import static com.sun.tools.javac.util.LayoutCharacters.*; 37 38 /** The lexical analyzer maps an input stream consisting of 39 * ASCII characters and Unicode escapes into a token sequence. 40 * 41 * <p><b>This is NOT part of any API supported by Sun Microsystems. If 42 * you write code that depends on this, you do so at your own risk. 43 * This code and its internal interfaces are subject to change or 44 * deletion without notice.</b> 45 */ 46 public class Scanner implements Lexer { 47 48 private static boolean scannerDebug = false; 49 50 /** A factory for creating scanners. */ 51 public static class Factory { 52 /** The context key for the scanner factory. */ 53 public static final Context.Key<Scanner.Factory> scannerFactoryKey = 54 new Context.Key<Scanner.Factory>(); 55 56 /** Get the Factory instance for this context. */ 57 public static Factory instance(Context context) { 58 Factory instance = context.get(scannerFactoryKey); 59 if (instance == null) 60 instance = new Factory(context); 61 return instance; 62 } 63 64 final Log log; 65 final Names names; 66 final Source source; 67 final Keywords keywords; 68 69 /** Create a new scanner factory. */ 70 protected Factory(Context context) { 71 context.put(scannerFactoryKey, this); 72 this.log = Log.instance(context); 73 this.names = Names.instance(context); 74 this.source = Source.instance(context); 75 this.keywords = Keywords.instance(context); 76 } 77 78 public Scanner newScanner(CharSequence input) { 79 if (input instanceof CharBuffer) { 80 return new Scanner(this, (CharBuffer)input); 81 } else { 82 char[] array = input.toString().toCharArray(); 83 return newScanner(array, array.length); 84 } 85 } 86 87 public Scanner newScanner(char[] input, int inputLength) { 88 return new Scanner(this, input, inputLength); 89 } 90 } 91 92 /* Output variables; set by nextToken(): 93 */ 94 95 /** The token, set by nextToken(). 96 */ 97 private Token token; 98 99 /** Allow hex floating-point literals. 100 */ 101 private boolean allowHexFloats; 102 103 /** Allow binary literals. 104 */ 105 private boolean allowBinaryLiterals; 106 107 /** Allow underscores in literals. 108 */ 109 private boolean allowUnderscoresInLiterals; 110 111 /** The source language setting. 112 */ 113 private Source source; 114 115 /** The token's position, 0-based offset from beginning of text. 116 */ 117 private int pos; 118 119 /** Character position just after the last character of the token. 120 */ 121 private int endPos; 122 123 /** The last character position of the previous token. 124 */ 125 private int prevEndPos; 126 127 /** The position where a lexical error occurred; 128 */ 129 private int errPos = Position.NOPOS; 130 131 /** The name of an identifier or token: 132 */ 133 private Name name; 134 135 /** The radix of a numeric literal token. 136 */ 137 private int radix; 138 139 /** Has a @deprecated been encountered in last doc comment? 140 * this needs to be reset by client. 141 */ 142 protected boolean deprecatedFlag = false; 143 144 /** A character buffer for literals. 145 */ 146 private char[] sbuf = new char[128]; 147 private int sp; 148 149 /** The input buffer, index of next chacter to be read, 150 * index of one past last character in buffer. 151 */ 152 private char[] buf; 153 private int bp; 154 private int buflen; 155 private int eofPos; 156 157 /** The current character. 158 */ 159 private char ch; 160 161 /** The buffer index of the last converted unicode character 162 */ 163 private int unicodeConversionBp = -1; 164 165 /** The log to be used for error reporting. 166 */ 167 private final Log log; 168 169 /** The name table. */ 170 private final Names names; 171 172 /** The keyword table. */ 173 private final Keywords keywords; 174 175 /** Common code for constructors. */ 176 private Scanner(Factory fac) { 177 log = fac.log; 178 names = fac.names; 179 keywords = fac.keywords; 180 source = fac.source; 181 allowBinaryLiterals = source.allowBinaryLiterals(); 182 allowHexFloats = source.allowHexFloats(); 183 allowUnderscoresInLiterals = source.allowBinaryLiterals(); 184 } 185 186 private static final boolean hexFloatsWork = hexFloatsWork(); 187 private static boolean hexFloatsWork() { 188 try { 189 Float.valueOf("0x1.0p1"); 190 return true; 191 } catch (NumberFormatException ex) { 192 return false; 193 } 194 } 195 196 /** Create a scanner from the input buffer. buffer must implement 197 * array() and compact(), and remaining() must be less than limit(). 198 */ 199 protected Scanner(Factory fac, CharBuffer buffer) { 200 this(fac, JavacFileManager.toArray(buffer), buffer.limit()); 201 } 202 203 /** 204 * Create a scanner from the input array. This method might 205 * modify the array. To avoid copying the input array, ensure 206 * that {@code inputLength < input.length} or 207 * {@code input[input.length -1]} is a white space character. 208 * 209 * @param fac the factory which created this Scanner 210 * @param input the input, might be modified 211 * @param inputLength the size of the input. 212 * Must be positive and less than or equal to input.length. 213 */ 214 protected Scanner(Factory fac, char[] input, int inputLength) { 215 this(fac); 216 eofPos = inputLength; 217 if (inputLength == input.length) { 218 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { 219 inputLength--; 220 } else { 221 char[] newInput = new char[inputLength + 1]; 222 System.arraycopy(input, 0, newInput, 0, input.length); 223 input = newInput; 224 } 225 } 226 buf = input; 227 buflen = inputLength; 228 buf[buflen] = EOI; 229 bp = -1; 230 scanChar(); 231 } 232 233 /** Report an error at the given position using the provided arguments. 234 */ 235 private void lexError(int pos, String key, Object... args) { 236 log.error(pos, key, args); 237 token = ERROR; 238 errPos = pos; 239 } 240 241 /** Report an error at the current token position using the provided 242 * arguments. 243 */ 244 private void lexError(String key, Object... args) { 245 lexError(pos, key, args); 246 } 247 248 /** Convert an ASCII digit from its base (8, 10, or 16) 249 * to its value. 250 */ 251 private int digit(int base) { 252 char c = ch; 253 int result = Character.digit(c, base); 254 if (result >= 0 && c > 0x7f) { 255 lexError(pos+1, "illegal.nonascii.digit"); 256 ch = "0123456789abcdef".charAt(result); 257 } 258 return result; 259 } 260 261 /** Convert unicode escape; bp points to initial '\' character 262 * (Spec 3.3). 263 */ 264 private void convertUnicode() { 265 if (ch == '\\' && unicodeConversionBp != bp) { 266 bp++; ch = buf[bp]; 267 if (ch == 'u') { 268 do { 269 bp++; ch = buf[bp]; 270 } while (ch == 'u'); 271 int limit = bp + 3; 272 if (limit < buflen) { 273 int d = digit(16); 274 int code = d; 275 while (bp < limit && d >= 0) { 276 bp++; ch = buf[bp]; 277 d = digit(16); 278 code = (code << 4) + d; 279 } 280 if (d >= 0) { 281 ch = (char)code; 282 unicodeConversionBp = bp; 283 return; 284 } 285 } 286 lexError(bp, "illegal.unicode.esc"); 287 } else { 288 bp--; 289 ch = '\\'; 290 } 291 } 292 } 293 294 /** Read next character. 295 */ 296 private void scanChar() { 297 ch = buf[++bp]; 298 if (ch == '\\') { 299 convertUnicode(); 300 } 301 } 302 303 /** Read next character in comment, skipping over double '\' characters. 304 */ 305 private void scanCommentChar() { 306 scanChar(); 307 if (ch == '\\') { 308 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { 309 bp++; 310 } else { 311 convertUnicode(); 312 } 313 } 314 } 315 316 /** Append a character to sbuf. 317 */ 318 private void putChar(char ch) { 319 if (sp == sbuf.length) { 320 char[] newsbuf = new char[sbuf.length * 2]; 321 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); 322 sbuf = newsbuf; 323 } 324 sbuf[sp++] = ch; 325 } 326 327 /** For debugging purposes: print character. 328 */ 329 private void dch() { 330 System.err.print(ch); System.out.flush(); 331 } 332 333 /** Read next character in character or string literal and copy into sbuf. 334 */ 335 private void scanLitChar(boolean forBytecodeName) { 336 if (ch == '\\') { 337 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { 338 bp++; 339 putChar('\\'); 340 scanChar(); 341 } else { 342 scanChar(); 343 switch (ch) { 344 case '0': case '1': case '2': case '3': 345 case '4': case '5': case '6': case '7': 346 char leadch = ch; 347 int oct = digit(8); 348 scanChar(); 349 if ('0' <= ch && ch <= '7') { 350 oct = oct * 8 + digit(8); 351 scanChar(); 352 if (leadch <= '3' && '0' <= ch && ch <= '7') { 353 oct = oct * 8 + digit(8); 354 scanChar(); 355 } 356 } 357 putChar((char)oct); 358 break; 359 case 'b': 360 putChar('\b'); scanChar(); break; 361 case 't': 362 putChar('\t'); scanChar(); break; 363 case 'n': 364 putChar('\n'); scanChar(); break; 365 case 'f': 366 putChar('\f'); scanChar(); break; 367 case 'r': 368 putChar('\r'); scanChar(); break; 369 case '\'': 370 putChar('\''); scanChar(); break; 371 case '\"': 372 putChar('\"'); scanChar(); break; 373 case '\\': 374 putChar('\\'); scanChar(); break; 375 case '|': case ',': case '?': case '%': 376 case '^': case '_': case '{': case '}': 377 case '!': case '-': case '=': 378 if (forBytecodeName) { 379 // Accept escape sequences for dangerous bytecode chars. 380 // This is illegal in normal Java string or character literals. 381 // Note that the escape sequence itself is passed through. 382 putChar('\\'); putChar(ch); scanChar(); 383 } else { 384 lexError(bp, "illegal.esc.char"); 385 } 386 break; 387 default: 388 lexError(bp, "illegal.esc.char"); 389 } 390 } 391 } else if (bp != buflen) { 392 putChar(ch); scanChar(); 393 } 394 } 395 private void scanLitChar() { 396 scanLitChar(false); 397 } 398 399 /** Read next character in an exotic name #"foo" 400 */ 401 private void scanBytecodeNameChar() { 402 switch (ch) { 403 // reject any "dangerous" char which is illegal somewhere in the JVM spec 404 // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm 405 case '/': case '.': case ';': // illegal everywhere 406 case '<': case '>': // illegal in methods, dangerous in classes 407 case '[': // illegal in classes 408 lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch)); 409 break; 410 } 411 scanLitChar(true); 412 } 413 414 private void scanDigits(int digitRadix) { 415 char saveCh; 416 int savePos; 417 do { 418 if (ch != '_') { 419 putChar(ch); 420 } else { 421 if (!allowUnderscoresInLiterals) { 422 lexError("unsupported.underscore", source.name); 423 allowUnderscoresInLiterals = true; 424 } 425 } 426 saveCh = ch; 427 savePos = bp; 428 scanChar(); 429 } while (digit(digitRadix) >= 0 || ch == '_'); 430 if (saveCh == '_') 431 lexError(savePos, "illegal.underscore"); 432 } 433 434 /** Read fractional part of hexadecimal floating point number. 435 */ 436 private void scanHexExponentAndSuffix() { 437 if (ch == 'p' || ch == 'P') { 438 putChar(ch); 439 scanChar(); 440 skipIllegalUnderscores(); 441 if (ch == '+' || ch == '-') { 442 putChar(ch); 443 scanChar(); 444 } 445 skipIllegalUnderscores(); 446 if ('0' <= ch && ch <= '9') { 447 scanDigits(10); 448 if (!allowHexFloats) { 449 lexError("unsupported.fp.lit", source.name); 450 allowHexFloats = true; 451 } 452 else if (!hexFloatsWork) 453 lexError("unsupported.cross.fp.lit"); 454 } else 455 lexError("malformed.fp.lit"); 456 } else { 457 lexError("malformed.fp.lit"); 458 } 459 if (ch == 'f' || ch == 'F') { 460 putChar(ch); 461 scanChar(); 462 token = FLOATLITERAL; 463 } else { 464 if (ch == 'd' || ch == 'D') { 465 putChar(ch); 466 scanChar(); 467 } 468 token = DOUBLELITERAL; 469 } 470 } 471 472 /** Read fractional part of floating point number. 473 */ 474 private void scanFraction() { 475 skipIllegalUnderscores(); 476 if ('0' <= ch && ch <= '9') { 477 scanDigits(10); 478 } 479 int sp1 = sp; 480 if (ch == 'e' || ch == 'E') { 481 putChar(ch); 482 scanChar(); 483 skipIllegalUnderscores(); 484 if (ch == '+' || ch == '-') { 485 putChar(ch); 486 scanChar(); 487 } 488 skipIllegalUnderscores(); 489 if ('0' <= ch && ch <= '9') { 490 scanDigits(10); 491 return; 492 } 493 lexError("malformed.fp.lit"); 494 sp = sp1; 495 } 496 } 497 498 /** Read fractional part and 'd' or 'f' suffix of floating point number. 499 */ 500 private void scanFractionAndSuffix() { 501 this.radix = 10; 502 scanFraction(); 503 if (ch == 'f' || ch == 'F') { 504 putChar(ch); 505 scanChar(); 506 token = FLOATLITERAL; 507 } else { 508 if (ch == 'd' || ch == 'D') { 509 putChar(ch); 510 scanChar(); 511 } 512 token = DOUBLELITERAL; 513 } 514 } 515 516 /** Read fractional part and 'd' or 'f' suffix of floating point number. 517 */ 518 private void scanHexFractionAndSuffix(boolean seendigit) { 519 this.radix = 16; 520 assert ch == '.'; 521 putChar(ch); 522 scanChar(); 523 skipIllegalUnderscores(); 524 if (digit(16) >= 0) { 525 seendigit = true; 526 scanDigits(16); 527 } 528 if (!seendigit) 529 lexError("invalid.hex.number"); 530 else 531 scanHexExponentAndSuffix(); 532 } 533 534 private void skipIllegalUnderscores() { 535 if (ch == '_') { 536 lexError(bp, "illegal.underscore"); 537 while (ch == '_') 538 scanChar(); 539 } 540 } 541 542 /** Read a number. 543 * @param radix The radix of the number; one of 2, j8, 10, 16. 544 */ 545 private void scanNumber(int radix) { 546 this.radix = radix; 547 // for octal, allow base-10 digit in case it's a float literal 548 int digitRadix = (radix == 8 ? 10 : radix); 549 boolean seendigit = false; 550 if (digit(digitRadix) >= 0) { 551 seendigit = true; 552 scanDigits(digitRadix); 553 } 554 if (radix == 16 && ch == '.') { 555 scanHexFractionAndSuffix(seendigit); 556 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { 557 scanHexExponentAndSuffix(); 558 } else if (digitRadix == 10 && ch == '.') { 559 putChar(ch); 560 scanChar(); 561 scanFractionAndSuffix(); 562 } else if (digitRadix == 10 && 563 (ch == 'e' || ch == 'E' || 564 ch == 'f' || ch == 'F' || 565 ch == 'd' || ch == 'D')) { 566 scanFractionAndSuffix(); 567 } else { 568 if (ch == 'l' || ch == 'L') { 569 scanChar(); 570 token = LONGLITERAL; 571 } else { 572 token = INTLITERAL; 573 } 574 } 575 } 576 577 /** Read an identifier. 578 */ 579 private void scanIdent() { 580 boolean isJavaIdentifierPart; 581 char high; 582 do { 583 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; 584 // optimization, was: putChar(ch); 585 586 scanChar(); 587 switch (ch) { 588 case 'A': case 'B': case 'C': case 'D': case 'E': 589 case 'F': case 'G': case 'H': case 'I': case 'J': 590 case 'K': case 'L': case 'M': case 'N': case 'O': 591 case 'P': case 'Q': case 'R': case 'S': case 'T': 592 case 'U': case 'V': case 'W': case 'X': case 'Y': 593 case 'Z': 594 case 'a': case 'b': case 'c': case 'd': case 'e': 595 case 'f': case 'g': case 'h': case 'i': case 'j': 596 case 'k': case 'l': case 'm': case 'n': case 'o': 597 case 'p': case 'q': case 'r': case 's': case 't': 598 case 'u': case 'v': case 'w': case 'x': case 'y': 599 case 'z': 600 case '$': case '_': 601 case '0': case '1': case '2': case '3': case '4': 602 case '5': case '6': case '7': case '8': case '9': 603 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 604 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 605 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 606 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 607 case '\u0015': case '\u0016': case '\u0017': 608 case '\u0018': case '\u0019': case '\u001B': 609 case '\u007F': 610 break; 611 case '\u001A': // EOI is also a legal identifier part 612 if (bp >= buflen) { 613 name = names.fromChars(sbuf, 0, sp); 614 token = keywords.key(name); 615 return; 616 } 617 break; 618 default: 619 if (ch < '\u0080') { 620 // all ASCII range chars already handled, above 621 isJavaIdentifierPart = false; 622 } else { 623 high = scanSurrogates(); 624 if (high != 0) { 625 if (sp == sbuf.length) { 626 putChar(high); 627 } else { 628 sbuf[sp++] = high; 629 } 630 isJavaIdentifierPart = Character.isJavaIdentifierPart( 631 Character.toCodePoint(high, ch)); 632 } else { 633 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch); 634 } 635 } 636 if (!isJavaIdentifierPart) { 637 name = names.fromChars(sbuf, 0, sp); 638 token = keywords.key(name); 639 return; 640 } 641 } 642 } while (true); 643 } 644 645 /** Are surrogates supported? 646 */ 647 final static boolean surrogatesSupported = surrogatesSupported(); 648 private static boolean surrogatesSupported() { 649 try { 650 Character.isHighSurrogate('a'); 651 return true; 652 } catch (NoSuchMethodError ex) { 653 return false; 654 } 655 } 656 657 /** Scan surrogate pairs. If 'ch' is a high surrogate and 658 * the next character is a low surrogate, then put the low 659 * surrogate in 'ch', and return the high surrogate. 660 * otherwise, just return 0. 661 */ 662 private char scanSurrogates() { 663 if (surrogatesSupported && Character.isHighSurrogate(ch)) { 664 char high = ch; 665 666 scanChar(); 667 668 if (Character.isLowSurrogate(ch)) { 669 return high; 670 } 671 672 ch = high; 673 } 674 675 return 0; 676 } 677 678 /** Return true if ch can be part of an operator. 679 */ 680 private boolean isSpecial(char ch) { 681 switch (ch) { 682 case '!': case '%': case '&': case '*': case '?': 683 case '+': case '-': case ':': case '<': case '=': 684 case '>': case '^': case '|': case '~': 685 case '@': 686 return true; 687 default: 688 return false; 689 } 690 } 691 692 /** Read longest possible sequence of special characters and convert 693 * to token. 694 */ 695 private void scanOperator() { 696 while (true) { 697 putChar(ch); 698 Name newname = names.fromChars(sbuf, 0, sp); 699 if (keywords.key(newname) == IDENTIFIER) { 700 sp--; 701 break; 702 } 703 name = newname; 704 token = keywords.key(newname); 705 scanChar(); 706 if (!isSpecial(ch)) break; 707 } 708 } 709 710 /** 711 * Scan a documention comment; determine if a deprecated tag is present. 712 * Called once the initial /, * have been skipped, positioned at the second * 713 * (which is treated as the beginning of the first line). 714 * Stops positioned at the closing '/'. 715 */ 716 @SuppressWarnings("fallthrough") 717 private void scanDocComment() { 718 boolean deprecatedPrefix = false; 719 720 forEachLine: 721 while (bp < buflen) { 722 723 // Skip optional WhiteSpace at beginning of line 724 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { 725 scanCommentChar(); 726 } 727 728 // Skip optional consecutive Stars 729 while (bp < buflen && ch == '*') { 730 scanCommentChar(); 731 if (ch == '/') { 732 return; 733 } 734 } 735 736 // Skip optional WhiteSpace after Stars 737 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { 738 scanCommentChar(); 739 } 740 741 deprecatedPrefix = false; 742 // At beginning of line in the JavaDoc sense. 743 if (bp < buflen && ch == '@' && !deprecatedFlag) { 744 scanCommentChar(); 745 if (bp < buflen && ch == 'd') { 746 scanCommentChar(); 747 if (bp < buflen && ch == 'e') { 748 scanCommentChar(); 749 if (bp < buflen && ch == 'p') { 750 scanCommentChar(); 751 if (bp < buflen && ch == 'r') { 752 scanCommentChar(); 753 if (bp < buflen && ch == 'e') { 754 scanCommentChar(); 755 if (bp < buflen && ch == 'c') { 756 scanCommentChar(); 757 if (bp < buflen && ch == 'a') { 758 scanCommentChar(); 759 if (bp < buflen && ch == 't') { 760 scanCommentChar(); 761 if (bp < buflen && ch == 'e') { 762 scanCommentChar(); 763 if (bp < buflen && ch == 'd') { 764 deprecatedPrefix = true; 765 scanCommentChar(); 766 }}}}}}}}}}} 767 if (deprecatedPrefix && bp < buflen) { 768 if (Character.isWhitespace(ch)) { 769 deprecatedFlag = true; 770 } else if (ch == '*') { 771 scanCommentChar(); 772 if (ch == '/') { 773 deprecatedFlag = true; 774 return; 775 } 776 } 777 } 778 779 // Skip rest of line 780 while (bp < buflen) { 781 switch (ch) { 782 case '*': 783 scanCommentChar(); 784 if (ch == '/') { 785 return; 786 } 787 break; 788 case CR: // (Spec 3.4) 789 scanCommentChar(); 790 if (ch != LF) { 791 continue forEachLine; 792 } 793 /* fall through to LF case */ 794 case LF: // (Spec 3.4) 795 scanCommentChar(); 796 continue forEachLine; 797 default: 798 scanCommentChar(); 799 } 800 } // rest of line 801 } // forEachLine 802 return; 803 } 804 805 /** The value of a literal token, recorded as a string. 806 * For integers, leading 0x and 'l' suffixes are suppressed. 807 */ 808 public String stringVal() { 809 return new String(sbuf, 0, sp); 810 } 811 812 /** Read token. 813 */ 814 public void nextToken() { 815 816 try { 817 prevEndPos = endPos; 818 sp = 0; 819 820 while (true) { 821 pos = bp; 822 switch (ch) { 823 case ' ': // (Spec 3.6) 824 case '\t': // (Spec 3.6) 825 case FF: // (Spec 3.6) 826 do { 827 scanChar(); 828 } while (ch == ' ' || ch == '\t' || ch == FF); 829 endPos = bp; 830 processWhiteSpace(); 831 break; 832 case LF: // (Spec 3.4) 833 scanChar(); 834 endPos = bp; 835 processLineTerminator(); 836 break; 837 case CR: // (Spec 3.4) 838 scanChar(); 839 if (ch == LF) { 840 scanChar(); 841 } 842 endPos = bp; 843 processLineTerminator(); 844 break; 845 case 'A': case 'B': case 'C': case 'D': case 'E': 846 case 'F': case 'G': case 'H': case 'I': case 'J': 847 case 'K': case 'L': case 'M': case 'N': case 'O': 848 case 'P': case 'Q': case 'R': case 'S': case 'T': 849 case 'U': case 'V': case 'W': case 'X': case 'Y': 850 case 'Z': 851 case 'a': case 'b': case 'c': case 'd': case 'e': 852 case 'f': case 'g': case 'h': case 'i': case 'j': 853 case 'k': case 'l': case 'm': case 'n': case 'o': 854 case 'p': case 'q': case 'r': case 's': case 't': 855 case 'u': case 'v': case 'w': case 'x': case 'y': 856 case 'z': 857 case '$': case '_': 858 scanIdent(); 859 return; 860 case '0': 861 scanChar(); 862 if (ch == 'x' || ch == 'X') { 863 scanChar(); 864 skipIllegalUnderscores(); 865 if (ch == '.') { 866 scanHexFractionAndSuffix(false); 867 } else if (digit(16) < 0) { 868 lexError("invalid.hex.number"); 869 } else { 870 scanNumber(16); 871 } 872 } else if (ch == 'b' || ch == 'B') { 873 if (!allowBinaryLiterals) { 874 lexError("unsupported.binary.lit", source.name); 875 allowBinaryLiterals = true; 876 } 877 scanChar(); 878 skipIllegalUnderscores(); 879 if (digit(2) < 0) { 880 lexError("invalid.binary.number"); 881 } else { 882 scanNumber(2); 883 } 884 } else { 885 putChar('0'); 886 if (ch == '_') { 887 int savePos = bp; 888 do { 889 scanChar(); 890 } while (ch == '_'); 891 if (digit(10) < 0) { 892 lexError(savePos, "illegal.underscore"); 893 } 894 } 895 scanNumber(8); 896 } 897 return; 898 case '1': case '2': case '3': case '4': 899 case '5': case '6': case '7': case '8': case '9': 900 scanNumber(10); 901 return; 902 case '.': 903 scanChar(); 904 if ('0' <= ch && ch <= '9') { 905 putChar('.'); 906 scanFractionAndSuffix(); 907 } else if (ch == '.') { 908 putChar('.'); putChar('.'); 909 scanChar(); 910 if (ch == '.') { 911 scanChar(); 912 putChar('.'); 913 token = ELLIPSIS; 914 } else { 915 lexError("malformed.fp.lit"); 916 } 917 } else { 918 token = DOT; 919 } 920 return; 921 case ',': 922 scanChar(); token = COMMA; return; 923 case ';': 924 scanChar(); token = SEMI; return; 925 case '(': 926 scanChar(); token = LPAREN; return; 927 case ')': 928 scanChar(); token = RPAREN; return; 929 case '[': 930 scanChar(); token = LBRACKET; return; 931 case ']': 932 scanChar(); token = RBRACKET; return; 933 case '{': 934 scanChar(); token = LBRACE; return; 935 case '}': 936 scanChar(); token = RBRACE; return; 937 case '/': 938 scanChar(); 939 if (ch == '/') { 940 do { 941 scanCommentChar(); 942 } while (ch != CR && ch != LF && bp < buflen); 943 if (bp < buflen) { 944 endPos = bp; 945 processComment(CommentStyle.LINE); 946 } 947 break; 948 } else if (ch == '*') { 949 scanChar(); 950 CommentStyle style; 951 if (ch == '*') { 952 style = CommentStyle.JAVADOC; 953 scanDocComment(); 954 } else { 955 style = CommentStyle.BLOCK; 956 while (bp < buflen) { 957 if (ch == '*') { 958 scanChar(); 959 if (ch == '/') break; 960 } else { 961 scanCommentChar(); 962 } 963 } 964 } 965 if (ch == '/') { 966 scanChar(); 967 endPos = bp; 968 processComment(style); 969 break; 970 } else { 971 lexError("unclosed.comment"); 972 return; 973 } 974 } else if (ch == '=') { 975 name = names.slashequals; 976 token = SLASHEQ; 977 scanChar(); 978 } else { 979 name = names.slash; 980 token = SLASH; 981 } 982 return; 983 case '\'': 984 scanChar(); 985 if (ch == '\'') { 986 lexError("empty.char.lit"); 987 } else { 988 if (ch == CR || ch == LF) 989 lexError(pos, "illegal.line.end.in.char.lit"); 990 scanLitChar(); 991 if (ch == '\'') { 992 scanChar(); 993 token = CHARLITERAL; 994 } else { 995 lexError(pos, "unclosed.char.lit"); 996 } 997 } 998 return; 999 case '\"': 1000 scanChar(); 1001 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) 1002 scanLitChar(); 1003 if (ch == '\"') { 1004 token = STRINGLITERAL; 1005 scanChar(); 1006 } else { 1007 lexError(pos, "unclosed.str.lit"); 1008 } 1009 return; 1010 case '#': 1011 scanChar(); 1012 if (ch == '\"') { 1013 scanChar(); 1014 if (ch == '\"') 1015 lexError(pos, "empty.bytecode.ident"); 1016 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) { 1017 scanBytecodeNameChar(); 1018 } 1019 if (ch == '\"') { 1020 name = names.fromChars(sbuf, 0, sp); 1021 token = IDENTIFIER; // even if #"int" or #"do" 1022 scanChar(); 1023 } else { 1024 lexError(pos, "unclosed.bytecode.ident"); 1025 } 1026 } else { 1027 lexError("illegal.char", String.valueOf((int)'#')); 1028 } 1029 return; 1030 default: 1031 if (isSpecial(ch)) { 1032 scanOperator(); 1033 } else { 1034 boolean isJavaIdentifierStart; 1035 if (ch < '\u0080') { 1036 // all ASCII range chars already handled, above 1037 isJavaIdentifierStart = false; 1038 } else { 1039 char high = scanSurrogates(); 1040 if (high != 0) { 1041 if (sp == sbuf.length) { 1042 putChar(high); 1043 } else { 1044 sbuf[sp++] = high; 1045 } 1046 1047 isJavaIdentifierStart = Character.isJavaIdentifierStart( 1048 Character.toCodePoint(high, ch)); 1049 } else { 1050 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch); 1051 } 1052 } 1053 if (isJavaIdentifierStart) { 1054 scanIdent(); 1055 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5 1056 token = EOF; 1057 pos = bp = eofPos; 1058 } else { 1059 lexError("illegal.char", String.valueOf((int)ch)); 1060 scanChar(); 1061 } 1062 } 1063 return; 1064 } 1065 } 1066 } finally { 1067 endPos = bp; 1068 if (scannerDebug) 1069 System.out.println("nextToken(" + pos 1070 + "," + endPos + ")=|" + 1071 new String(getRawCharacters(pos, endPos)) 1072 + "|"); 1073 } 1074 } 1075 1076 /** Return the current token, set by nextToken(). 1077 */ 1078 public Token token() { 1079 return token; 1080 } 1081 1082 /** Sets the current token. 1083 */ 1084 public void token(Token token) { 1085 this.token = token; 1086 } 1087 1088 /** Return the current token's position: a 0-based 1089 * offset from beginning of the raw input stream 1090 * (before unicode translation) 1091 */ 1092 public int pos() { 1093 return pos; 1094 } 1095 1096 /** Return the last character position of the current token. 1097 */ 1098 public int endPos() { 1099 return endPos; 1100 } 1101 1102 /** Return the last character position of the previous token. 1103 */ 1104 public int prevEndPos() { 1105 return prevEndPos; 1106 } 1107 1108 /** Return the position where a lexical error occurred; 1109 */ 1110 public int errPos() { 1111 return errPos; 1112 } 1113 1114 /** Set the position where a lexical error occurred; 1115 */ 1116 public void errPos(int pos) { 1117 errPos = pos; 1118 } 1119 1120 /** Return the name of an identifier or token for the current token. 1121 */ 1122 public Name name() { 1123 return name; 1124 } 1125 1126 /** Return the radix of a numeric literal token. 1127 */ 1128 public int radix() { 1129 return radix; 1130 } 1131 1132 /** Has a @deprecated been encountered in last doc comment? 1133 * This needs to be reset by client with resetDeprecatedFlag. 1134 */ 1135 public boolean deprecatedFlag() { 1136 return deprecatedFlag; 1137 } 1138 1139 public void resetDeprecatedFlag() { 1140 deprecatedFlag = false; 1141 } 1142 1143 /** 1144 * Returns the documentation string of the current token. 1145 */ 1146 public String docComment() { 1147 return null; 1148 } 1149 1150 /** 1151 * Returns a copy of the input buffer, up to its inputLength. 1152 * Unicode escape sequences are not translated. 1153 */ 1154 public char[] getRawCharacters() { 1155 char[] chars = new char[buflen]; 1156 System.arraycopy(buf, 0, chars, 0, buflen); 1157 return chars; 1158 } 1159 1160 /** 1161 * Returns a copy of a character array subset of the input buffer. 1162 * The returned array begins at the <code>beginIndex</code> and 1163 * extends to the character at index <code>endIndex - 1</code>. 1164 * Thus the length of the substring is <code>endIndex-beginIndex</code>. 1165 * This behavior is like 1166 * <code>String.substring(beginIndex, endIndex)</code>. 1167 * Unicode escape sequences are not translated. 1168 * 1169 * @param beginIndex the beginning index, inclusive. 1170 * @param endIndex the ending index, exclusive. 1171 * @throws IndexOutOfBounds if either offset is outside of the 1172 * array bounds 1173 */ 1174 public char[] getRawCharacters(int beginIndex, int endIndex) { 1175 int length = endIndex - beginIndex; 1176 char[] chars = new char[length]; 1177 System.arraycopy(buf, beginIndex, chars, 0, length); 1178 return chars; 1179 } 1180 1181 public enum CommentStyle { 1182 LINE, 1183 BLOCK, 1184 JAVADOC, 1185 } 1186 1187 /** 1188 * Called when a complete comment has been scanned. pos and endPos 1189 * will mark the comment boundary. 1190 */ 1191 protected void processComment(CommentStyle style) { 1192 if (scannerDebug) 1193 System.out.println("processComment(" + pos 1194 + "," + endPos + "," + style + ")=|" 1195 + new String(getRawCharacters(pos, endPos)) 1196 + "|"); 1197 } 1198 1199 /** 1200 * Called when a complete whitespace run has been scanned. pos and endPos 1201 * will mark the whitespace boundary. 1202 */ 1203 protected void processWhiteSpace() { 1204 if (scannerDebug) 1205 System.out.println("processWhitespace(" + pos 1206 + "," + endPos + ")=|" + 1207 new String(getRawCharacters(pos, endPos)) 1208 + "|"); 1209 } 1210 1211 /** 1212 * Called when a line terminator has been processed. 1213 */ 1214 protected void processLineTerminator() { 1215 if (scannerDebug) 1216 System.out.println("processTerminator(" + pos 1217 + "," + endPos + ")=|" + 1218 new String(getRawCharacters(pos, endPos)) 1219 + "|"); 1220 } 1221 1222 /** Build a map for translating between line numbers and 1223 * positions in the input. 1224 * 1225 * @return a LineMap */ 1226 public Position.LineMap getLineMap() { 1227 return Position.makeLineMap(buf, buflen, false); 1228 } 1229 1230 }