1 /* 2 * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.tools.java; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.util.Hashtable; 31 32 /** 33 * A Scanner for Java tokens. Errors are reported 34 * to the environment object.<p> 35 * 36 * The scanner keeps track of the current token, 37 * the value of the current token (if any), and the start 38 * position of the current token.<p> 39 * 40 * The scan() method advances the scanner to the next 41 * token in the input.<p> 42 * 43 * The match() method is used to quickly match opening 44 * brackets (ie: '(', '{', or '[') with their closing 45 * counter part. This is useful during error recovery.<p> 46 * 47 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset) 48 * this means that both the line number and the exact offset into 49 * the file are encoded in each position value.<p> 50 * 51 * The compiler treats either "\n", "\r" or "\r\n" as the 52 * end of a line.<p> 53 * 54 * WARNING: The contents of this source file are not part of any 55 * supported API. Code that depends on them does so at its own risk: 56 * they are subject to change or removal without notice. 57 * 58 * @author Arthur van Hoff 59 */ 60 61 public 62 class Scanner implements Constants { 63 /** 64 * The increment for each character. 65 */ 66 public static final long OFFSETINC = 1; 67 68 /** 69 * The increment for each line. 70 */ 71 public static final long LINEINC = 1L << WHEREOFFSETBITS; 72 73 /** 74 * End of input 75 */ 76 public static final int EOF = -1; 77 78 /** 79 * Where errors are reported 80 */ 81 public Environment env; 82 83 /** 84 * Input reader 85 */ 86 protected ScannerInputReader in; 87 88 /** 89 * If true, present all comments as tokens. 90 * Contents are not saved, but positions are recorded accurately, 91 * so the comment can be recovered from the text. 92 * Line terminations are also returned as comment tokens, 93 * and may be distinguished by their start and end positions, 94 * which are equal (meaning, these tokens contain no chars). 95 */ 96 public boolean scanComments = false; 97 98 /** 99 * Current token 100 */ 101 public int token; 102 103 /** 104 * The position of the current token 105 */ 106 public long pos; 107 108 /** 109 * The position of the previous token 110 */ 111 public long prevPos; 112 113 /** 114 * The current character 115 */ 116 protected int ch; 117 118 /* 119 * Token values. 120 */ 121 public char charValue; 122 public int intValue; 123 public long longValue; 124 public float floatValue; 125 public double doubleValue; 126 public String stringValue; 127 public Identifier idValue; 128 public int radix; // Radix, when reading int or long 129 130 /* 131 * A doc comment preceding the most recent token 132 */ 133 public String docComment; 134 135 /* 136 * A growable character buffer. 137 */ 138 private int count; 139 private char buffer[] = new char[1024]; 140 private void growBuffer() { 141 char newBuffer[] = new char[buffer.length * 2]; 142 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 143 buffer = newBuffer; 144 } 145 146 // The following two methods have been hand-inlined in 147 // scanDocComment. If you make changes here, you should 148 // check to see if scanDocComment also needs modification. 149 private void putc(int ch) { 150 if (count == buffer.length) { 151 growBuffer(); 152 } 153 buffer[count++] = (char)ch; 154 } 155 156 private String bufferString() { 157 return new String(buffer, 0, count); 158 } 159 160 /** 161 * Create a scanner to scan an input stream. 162 */ 163 public Scanner(Environment env, InputStream in) throws IOException { 164 this.env = env; 165 useInputStream(in); 166 } 167 168 /** 169 * Setup input from the given input stream, 170 * and scan the first token from it. 171 */ 172 protected void useInputStream(InputStream in) throws IOException { 173 try { 174 this.in = new ScannerInputReader(env, in); 175 } catch (Exception e) { 176 env.setCharacterEncoding(null); 177 this.in = new ScannerInputReader(env, in); 178 } 179 180 ch = this.in.read(); 181 prevPos = this.in.pos; 182 183 scan(); 184 } 185 186 /** 187 * Create a scanner to scan an input stream. 188 */ 189 protected Scanner(Environment env) { 190 this.env = env; 191 // Expect the subclass to call useInputStream at the right time. 192 } 193 194 /** 195 * Define a keyword. 196 */ 197 private static void defineKeyword(int val) { 198 Identifier.lookup(opNames[val]).setType(val); 199 } 200 201 /** 202 * Initialized keyword and token Hashtables 203 */ 204 static { 205 // Statement keywords 206 defineKeyword(FOR); 207 defineKeyword(IF); 208 defineKeyword(ELSE); 209 defineKeyword(WHILE); 210 defineKeyword(DO); 211 defineKeyword(SWITCH); 212 defineKeyword(CASE); 213 defineKeyword(DEFAULT); 214 defineKeyword(BREAK); 215 defineKeyword(CONTINUE); 216 defineKeyword(RETURN); 217 defineKeyword(TRY); 218 defineKeyword(CATCH); 219 defineKeyword(FINALLY); 220 defineKeyword(THROW); 221 222 // Type defineKeywords 223 defineKeyword(BYTE); 224 defineKeyword(CHAR); 225 defineKeyword(SHORT); 226 defineKeyword(INT); 227 defineKeyword(LONG); 228 defineKeyword(FLOAT); 229 defineKeyword(DOUBLE); 230 defineKeyword(VOID); 231 defineKeyword(BOOLEAN); 232 233 // Expression keywords 234 defineKeyword(INSTANCEOF); 235 defineKeyword(TRUE); 236 defineKeyword(FALSE); 237 defineKeyword(NEW); 238 defineKeyword(THIS); 239 defineKeyword(SUPER); 240 defineKeyword(NULL); 241 242 // Declaration keywords 243 defineKeyword(IMPORT); 244 defineKeyword(CLASS); 245 defineKeyword(EXTENDS); 246 defineKeyword(IMPLEMENTS); 247 defineKeyword(INTERFACE); 248 defineKeyword(PACKAGE); 249 defineKeyword(THROWS); 250 251 // Modifier keywords 252 defineKeyword(PRIVATE); 253 defineKeyword(PUBLIC); 254 defineKeyword(PROTECTED); 255 defineKeyword(STATIC); 256 defineKeyword(TRANSIENT); 257 defineKeyword(SYNCHRONIZED); 258 defineKeyword(NATIVE); 259 defineKeyword(ABSTRACT); 260 defineKeyword(VOLATILE); 261 defineKeyword(FINAL); 262 defineKeyword(STRICTFP); 263 264 // reserved keywords 265 defineKeyword(CONST); 266 defineKeyword(GOTO); 267 } 268 269 /** 270 * Scan a comment. This method should be 271 * called once the initial /, * and the next 272 * character have been read. 273 */ 274 private void skipComment() throws IOException { 275 while (true) { 276 switch (ch) { 277 case EOF: 278 env.error(pos, "eof.in.comment"); 279 return; 280 281 case '*': 282 if ((ch = in.read()) == '/') { 283 ch = in.read(); 284 return; 285 } 286 break; 287 288 default: 289 ch = in.read(); 290 break; 291 } 292 } 293 } 294 295 /** 296 * Scan a doc comment. This method should be called 297 * once the initial /, * and * have been read. It gathers 298 * the content of the comment (witout leading spaces and '*'s) 299 * in the string buffer. 300 */ 301 private String scanDocComment() throws IOException { 302 // Note: this method has been hand-optimized to yield 303 // better performance. This was done after it was noted 304 // that javadoc spent a great deal of its time here. 305 // This should also help the performance of the compiler 306 // as well -- it scans the doc comments to find 307 // @deprecated tags. 308 // 309 // The logic of the method has been completely rewritten 310 // to avoid the use of flags that need to be looked at 311 // for every character read. Members that are accessed 312 // more than once have been stored in local variables. 313 // The methods putc() and bufferString() have been 314 // inlined by hand. Extra cases have been added to 315 // switch statements to trick the compiler into generating 316 // a tableswitch instead of a lookupswitch. 317 // 318 // This implementation aims to preserve the previous 319 // behavior of this method. 320 321 int c; 322 323 // Put `in' in a local variable. 324 final ScannerInputReader in = this.in; 325 326 // We maintain the buffer locally rather than calling putc(). 327 char[] buffer = this.buffer; 328 int count = 0; 329 330 // We are called pointing at the second star of the doc 331 // comment: 332 // 333 // Input: /** the rest of the comment ... */ 334 // ^ 335 // 336 // We rely on this in the code below. 337 338 // Consume any number of stars. 339 while ((c = in.read()) == '*') 340 ; 341 342 // Is the comment of the form /**/, /***/, /****/, etc.? 343 if (c == '/') { 344 // Set ch and return 345 ch = in.read(); 346 return ""; 347 } 348 349 // Skip a newline on the first line of the comment. 350 if (c == '\n') { 351 c = in.read(); 352 } 353 354 outerLoop: 355 // The outerLoop processes the doc comment, looping once 356 // for each line. For each line, it first strips off 357 // whitespace, then it consumes any stars, then it 358 // puts the rest of the line into our buffer. 359 while (true) { 360 361 // The wsLoop consumes whitespace from the beginning 362 // of each line. 363 wsLoop: 364 while (true) { 365 switch (c) { 366 case ' ': 367 case '\t': 368 // We could check for other forms of whitespace 369 // as well, but this is left as is for minimum 370 // disturbance of functionality. 371 // 372 // Just skip whitespace. 373 c = in.read(); 374 break; 375 376 // We have added extra cases here to trick the 377 // compiler into using a tableswitch instead of 378 // a lookupswitch. They can be removed without 379 // a change in meaning. 380 case 10: case 11: case 12: case 13: case 14: case 15: 381 case 16: case 17: case 18: case 19: case 20: case 21: 382 case 22: case 23: case 24: case 25: case 26: case 27: 383 case 28: case 29: case 30: case 31: 384 default: 385 // We've seen something that isn't whitespace, 386 // jump out. 387 break wsLoop; 388 } 389 } // end wsLoop. 390 391 // Are there stars here? If so, consume them all 392 // and check for the end of comment. 393 if (c == '*') { 394 // Skip all of the stars... 395 do { 396 c = in.read(); 397 } while (c == '*'); 398 399 // ...then check for the closing slash. 400 if (c == '/') { 401 // We're done with the doc comment. 402 // Set ch and break out. 403 ch = in.read(); 404 break outerLoop; 405 } 406 } 407 408 // The textLoop processes the rest of the characters 409 // on the line, adding them to our buffer. 410 textLoop: 411 while (true) { 412 switch (c) { 413 case EOF: 414 // We've seen a premature EOF. Break out 415 // of the loop. 416 env.error(pos, "eof.in.comment"); 417 ch = EOF; 418 break outerLoop; 419 420 case '*': 421 // Is this just a star? Or is this the 422 // end of a comment? 423 c = in.read(); 424 if (c == '/') { 425 // This is the end of the comment, 426 // set ch and return our buffer. 427 ch = in.read(); 428 break outerLoop; 429 } 430 // This is just an ordinary star. Add it to 431 // the buffer. 432 if (count == buffer.length) { 433 growBuffer(); 434 buffer = this.buffer; 435 } 436 buffer[count++] = '*'; 437 break; 438 439 case '\n': 440 // We've seen a newline. Add it to our 441 // buffer and break out of this loop, 442 // starting fresh on a new line. 443 if (count == buffer.length) { 444 growBuffer(); 445 buffer = this.buffer; 446 } 447 buffer[count++] = '\n'; 448 c = in.read(); 449 break textLoop; 450 451 // Again, the extra cases here are a trick 452 // to get the compiler to generate a tableswitch. 453 case 0: case 1: case 2: case 3: case 4: case 5: 454 case 6: case 7: case 8: case 11: case 12: case 13: 455 case 14: case 15: case 16: case 17: case 18: case 19: 456 case 20: case 21: case 22: case 23: case 24: case 25: 457 case 26: case 27: case 28: case 29: case 30: case 31: 458 case 32: case 33: case 34: case 35: case 36: case 37: 459 case 38: case 39: case 40: 460 default: 461 // Add the character to our buffer. 462 if (count == buffer.length) { 463 growBuffer(); 464 buffer = this.buffer; 465 } 466 buffer[count++] = (char)c; 467 c = in.read(); 468 break; 469 } 470 } // end textLoop 471 } // end outerLoop 472 473 // We have scanned our doc comment. It is stored in 474 // buffer. The previous implementation of scanDocComment 475 // stripped off all trailing spaces and stars from the comment. 476 // We will do this as well, so as to cause a minimum of 477 // disturbance. Is this what we want? 478 if (count > 0) { 479 int i = count - 1; 480 trailLoop: 481 while (i > -1) { 482 switch (buffer[i]) { 483 case ' ': 484 case '\t': 485 case '*': 486 i--; 487 break; 488 // And again, the extra cases here are a trick 489 // to get the compiler to generate a tableswitch. 490 case 0: case 1: case 2: case 3: case 4: case 5: 491 case 6: case 7: case 8: case 10: case 11: case 12: 492 case 13: case 14: case 15: case 16: case 17: case 18: 493 case 19: case 20: case 21: case 22: case 23: case 24: 494 case 25: case 26: case 27: case 28: case 29: case 30: 495 case 31: case 33: case 34: case 35: case 36: case 37: 496 case 38: case 39: case 40: 497 default: 498 break trailLoop; 499 } 500 } 501 count = i + 1; 502 503 // Return the text of the doc comment. 504 return new String(buffer, 0, count); 505 } else { 506 return ""; 507 } 508 } 509 510 /** 511 * Scan a number. The first digit of the number should be the current 512 * character. We may be scanning hex, decimal, or octal at this point 513 */ 514 @SuppressWarnings("fallthrough") 515 private void scanNumber() throws IOException { 516 boolean seenNonOctal = false; 517 boolean overflow = false; 518 boolean seenDigit = false; // used to detect invalid hex number 0xL 519 radix = (ch == '0' ? 8 : 10); 520 long value = ch - '0'; 521 count = 0; 522 putc(ch); // save character in buffer 523 numberLoop: 524 for (;;) { 525 switch (ch = in.read()) { 526 case '.': 527 if (radix == 16) 528 break numberLoop; // an illegal character 529 scanReal(); 530 return; 531 532 case '8': case '9': 533 // We can't yet throw an error if reading an octal. We might 534 // discover we're really reading a real. 535 seenNonOctal = true; 536 // Fall through 537 case '0': case '1': case '2': case '3': 538 case '4': case '5': case '6': case '7': 539 seenDigit = true; 540 putc(ch); 541 if (radix == 10) { 542 overflow = overflow || (value * 10)/10 != value; 543 value = (value * 10) + (ch - '0'); 544 overflow = overflow || (value - 1 < -1); 545 } else if (radix == 8) { 546 overflow = overflow || (value >>> 61) != 0; 547 value = (value << 3) + (ch - '0'); 548 } else { 549 overflow = overflow || (value >>> 60) != 0; 550 value = (value << 4) + (ch - '0'); 551 } 552 break; 553 554 case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': 555 if (radix != 16) { 556 scanReal(); 557 return; 558 } 559 // fall through 560 case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': 561 seenDigit = true; 562 putc(ch); 563 if (radix != 16) 564 break numberLoop; // an illegal character 565 overflow = overflow || (value >>> 60) != 0; 566 value = (value << 4) + 10 + 567 Character.toLowerCase((char)ch) - 'a'; 568 break; 569 570 case 'l': case 'L': 571 ch = in.read(); // skip over 'l' 572 longValue = value; 573 token = LONGVAL; 574 break numberLoop; 575 576 case 'x': case 'X': 577 // if the first character is a '0' and this is the second 578 // letter, then read in a hexadecimal number. Otherwise, error. 579 if (count == 1 && radix == 8) { 580 radix = 16; 581 seenDigit = false; 582 break; 583 } else { 584 // we'll get an illegal character error 585 break numberLoop; 586 } 587 588 default: 589 intValue = (int)value; 590 token = INTVAL; 591 break numberLoop; 592 } 593 } // while true 594 595 // We have just finished reading the number. The next thing better 596 // not be a letter or digit. 597 // Note: There will be deprecation warnings against these uses 598 // of Character.isJavaLetterOrDigit and Character.isJavaLetter. 599 // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs. 600 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 601 env.error(in.pos, "invalid.number"); 602 do { ch = in.read(); } 603 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 604 intValue = 0; 605 token = INTVAL; 606 } else if (radix == 8 && seenNonOctal) { 607 // A bogus octal literal. 608 intValue = 0; 609 token = INTVAL; 610 env.error(pos, "invalid.octal.number"); 611 } else if (radix == 16 && seenDigit == false) { 612 // A hex literal with no digits, 0xL, for example. 613 intValue = 0; 614 token = INTVAL; 615 env.error(pos, "invalid.hex.number"); 616 } else { 617 if (token == INTVAL) { 618 // Check for overflow. Note that base 10 literals 619 // have different rules than base 8 and 16. 620 overflow = overflow || 621 (value & 0xFFFFFFFF00000000L) != 0 || 622 (radix == 10 && value > 2147483648L); 623 624 if (overflow) { 625 intValue = 0; 626 627 // Give a specific error message which tells 628 // the user the range. 629 switch (radix) { 630 case 8: 631 env.error(pos, "overflow.int.oct"); 632 break; 633 case 10: 634 env.error(pos, "overflow.int.dec"); 635 break; 636 case 16: 637 env.error(pos, "overflow.int.hex"); 638 break; 639 default: 640 throw new CompilerError("invalid radix"); 641 } 642 } 643 } else { 644 if (overflow) { 645 longValue = 0; 646 647 // Give a specific error message which tells 648 // the user the range. 649 switch (radix) { 650 case 8: 651 env.error(pos, "overflow.long.oct"); 652 break; 653 case 10: 654 env.error(pos, "overflow.long.dec"); 655 break; 656 case 16: 657 env.error(pos, "overflow.long.hex"); 658 break; 659 default: 660 throw new CompilerError("invalid radix"); 661 } 662 } 663 } 664 } 665 } 666 667 /** 668 * Scan a float. We are either looking at the decimal, or we have already 669 * seen it and put it into the buffer. We haven't seen an exponent. 670 * Scan a float. Should be called with the current character is either 671 * the 'e', 'E' or '.' 672 */ 673 @SuppressWarnings("fallthrough") 674 private void scanReal() throws IOException { 675 boolean seenExponent = false; 676 boolean isSingleFloat = false; 677 char lastChar; 678 if (ch == '.') { 679 putc(ch); 680 ch = in.read(); 681 } 682 683 numberLoop: 684 for ( ; ; ch = in.read()) { 685 switch (ch) { 686 case '0': case '1': case '2': case '3': case '4': 687 case '5': case '6': case '7': case '8': case '9': 688 putc(ch); 689 break; 690 691 case 'e': case 'E': 692 if (seenExponent) 693 break numberLoop; // we'll get a format error 694 putc(ch); 695 seenExponent = true; 696 break; 697 698 case '+': case '-': 699 lastChar = buffer[count - 1]; 700 if (lastChar != 'e' && lastChar != 'E') 701 break numberLoop; // this isn't an error, though! 702 putc(ch); 703 break; 704 705 case 'f': case 'F': 706 ch = in.read(); // skip over 'f' 707 isSingleFloat = true; 708 break numberLoop; 709 710 case 'd': case 'D': 711 ch = in.read(); // skip over 'd' 712 // fall through 713 default: 714 break numberLoop; 715 } // sswitch 716 } // loop 717 718 // we have just finished reading the number. The next thing better 719 // not be a letter or digit. 720 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 721 env.error(in.pos, "invalid.number"); 722 do { ch = in.read(); } 723 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 724 doubleValue = 0; 725 token = DOUBLEVAL; 726 } else { 727 token = isSingleFloat ? FLOATVAL : DOUBLEVAL; 728 try { 729 lastChar = buffer[count - 1]; 730 if (lastChar == 'e' || lastChar == 'E' 731 || lastChar == '+' || lastChar == '-') { 732 env.error(in.pos -1, "float.format"); 733 } else if (isSingleFloat) { 734 String string = bufferString(); 735 floatValue = Float.valueOf(string).floatValue(); 736 if (Float.isInfinite(floatValue)) { 737 env.error(pos, "overflow.float"); 738 } else if (floatValue == 0 && !looksLikeZero(string)) { 739 env.error(pos, "underflow.float"); 740 } 741 } else { 742 String string = bufferString(); 743 doubleValue = Double.valueOf(string).doubleValue(); 744 if (Double.isInfinite(doubleValue)) { 745 env.error(pos, "overflow.double"); 746 } else if (doubleValue == 0 && !looksLikeZero(string)) { 747 env.error(pos, "underflow.double"); 748 } 749 } 750 } catch (NumberFormatException ee) { 751 env.error(pos, "float.format"); 752 doubleValue = 0; 753 floatValue = 0; 754 } 755 } 756 return; 757 } 758 759 // We have a token that parses as a number. Is this token possibly zero? 760 // i.e. does it have a non-zero value in the mantissa? 761 private static boolean looksLikeZero(String token) { 762 int length = token.length(); 763 for (int i = 0; i < length; i++) { 764 switch (token.charAt(i)) { 765 case 0: case '.': 766 continue; 767 case '1': case '2': case '3': case '4': case '5': 768 case '6': case '7': case '8': case '9': 769 return false; 770 case 'e': case 'E': case 'f': case 'F': 771 return true; 772 } 773 } 774 return true; 775 } 776 777 /** 778 * Scan an escape character. 779 * @return the character or -1 if it escaped an 780 * end-of-line. 781 */ 782 private int scanEscapeChar() throws IOException { 783 long p = in.pos; 784 785 switch (ch = in.read()) { 786 case '0': case '1': case '2': case '3': 787 case '4': case '5': case '6': case '7': { 788 int n = ch - '0'; 789 for (int i = 2 ; i > 0 ; i--) { 790 switch (ch = in.read()) { 791 case '0': case '1': case '2': case '3': 792 case '4': case '5': case '6': case '7': 793 n = (n << 3) + ch - '0'; 794 break; 795 796 default: 797 if (n > 0xFF) { 798 env.error(p, "invalid.escape.char"); 799 } 800 return n; 801 } 802 } 803 ch = in.read(); 804 if (n > 0xFF) { 805 env.error(p, "invalid.escape.char"); 806 } 807 return n; 808 } 809 810 case 'r': ch = in.read(); return '\r'; 811 case 'n': ch = in.read(); return '\n'; 812 case 'f': ch = in.read(); return '\f'; 813 case 'b': ch = in.read(); return '\b'; 814 case 't': ch = in.read(); return '\t'; 815 case '\\': ch = in.read(); return '\\'; 816 case '\"': ch = in.read(); return '\"'; 817 case '\'': ch = in.read(); return '\''; 818 } 819 820 env.error(p, "invalid.escape.char"); 821 ch = in.read(); 822 return -1; 823 } 824 825 /** 826 * Scan a string. The current character 827 * should be the opening " of the string. 828 */ 829 private void scanString() throws IOException { 830 token = STRINGVAL; 831 count = 0; 832 ch = in.read(); 833 834 // Scan a String 835 while (true) { 836 switch (ch) { 837 case EOF: 838 env.error(pos, "eof.in.string"); 839 stringValue = bufferString(); 840 return; 841 842 case '\r': 843 case '\n': 844 ch = in.read(); 845 env.error(pos, "newline.in.string"); 846 stringValue = bufferString(); 847 return; 848 849 case '"': 850 ch = in.read(); 851 stringValue = bufferString(); 852 return; 853 854 case '\\': { 855 int c = scanEscapeChar(); 856 if (c >= 0) { 857 putc((char)c); 858 } 859 break; 860 } 861 862 default: 863 putc(ch); 864 ch = in.read(); 865 break; 866 } 867 } 868 } 869 870 /** 871 * Scan a character. The current character should be 872 * the opening ' of the character constant. 873 */ 874 private void scanCharacter() throws IOException { 875 token = CHARVAL; 876 877 switch (ch = in.read()) { 878 case '\\': 879 int c = scanEscapeChar(); 880 charValue = (char)((c >= 0) ? c : 0); 881 break; 882 883 case '\'': 884 // There are two standard problems this case deals with. One 885 // is the malformed single quote constant (i.e. the programmer 886 // uses ''' instead of '\'') and the other is the empty 887 // character constant (i.e. ''). Just consume any number of 888 // single quotes and emit an error message. 889 charValue = 0; 890 env.error(pos, "invalid.char.constant"); 891 ch = in.read(); 892 while (ch == '\'') { 893 ch = in.read(); 894 } 895 return; 896 897 case '\r': 898 case '\n': 899 charValue = 0; 900 env.error(pos, "invalid.char.constant"); 901 return; 902 903 default: 904 charValue = (char)ch; 905 ch = in.read(); 906 break; 907 } 908 909 if (ch == '\'') { 910 ch = in.read(); 911 } else { 912 env.error(pos, "invalid.char.constant"); 913 while (true) { 914 switch (ch) { 915 case '\'': 916 ch = in.read(); 917 return; 918 case ';': 919 case '\n': 920 case EOF: 921 return; 922 default: 923 ch = in.read(); 924 } 925 } 926 } 927 } 928 929 /** 930 * Scan an Identifier. The current character should 931 * be the first character of the identifier. 932 */ 933 private void scanIdentifier() throws IOException { 934 count = 0; 935 936 while (true) { 937 putc(ch); 938 switch (ch = in.read()) { 939 case 'a': case 'b': case 'c': case 'd': case 'e': 940 case 'f': case 'g': case 'h': case 'i': case 'j': 941 case 'k': case 'l': case 'm': case 'n': case 'o': 942 case 'p': case 'q': case 'r': case 's': case 't': 943 case 'u': case 'v': case 'w': case 'x': case 'y': 944 case 'z': 945 case 'A': case 'B': case 'C': case 'D': case 'E': 946 case 'F': case 'G': case 'H': case 'I': case 'J': 947 case 'K': case 'L': case 'M': case 'N': case 'O': 948 case 'P': case 'Q': case 'R': case 'S': case 'T': 949 case 'U': case 'V': case 'W': case 'X': case 'Y': 950 case 'Z': 951 case '0': case '1': case '2': case '3': case '4': 952 case '5': case '6': case '7': case '8': case '9': 953 case '$': case '_': 954 break; 955 956 default: 957 if (!Character.isJavaLetterOrDigit((char)ch)) { 958 idValue = Identifier.lookup(bufferString()); 959 token = idValue.getType(); 960 return; 961 } 962 } 963 } 964 } 965 966 /** 967 * The ending position of the current token 968 */ 969 // Note: This should be part of the pos itself. 970 public long getEndPos() { 971 return in.pos; 972 } 973 974 /** 975 * If the current token is IDENT, return the identifier occurrence. 976 * It will be freshly allocated. 977 */ 978 public IdentifierToken getIdToken() { 979 return (token != IDENT) ? null : new IdentifierToken(pos, idValue); 980 } 981 982 /** 983 * Scan the next token. 984 * @return the position of the previous token. 985 */ 986 public long scan() throws IOException { 987 return xscan(); 988 } 989 990 @SuppressWarnings("fallthrough") 991 protected long xscan() throws IOException { 992 final ScannerInputReader in = this.in; 993 long retPos = pos; 994 prevPos = in.pos; 995 docComment = null; 996 while (true) { 997 pos = in.pos; 998 999 switch (ch) { 1000 case EOF: 1001 token = EOF; 1002 return retPos; 1003 1004 case '\n': 1005 if (scanComments) { 1006 ch = ' '; 1007 // Avoid this path the next time around. 1008 // Do not just call in.read; we want to present 1009 // a null token (and also avoid read-ahead). 1010 token = COMMENT; 1011 return retPos; 1012 } 1013 // Fall through 1014 case ' ': 1015 case '\t': 1016 case '\f': 1017 ch = in.read(); 1018 break; 1019 1020 case '/': 1021 switch (ch = in.read()) { 1022 case '/': 1023 // Parse a // comment 1024 while (((ch = in.read()) != EOF) && (ch != '\n')); 1025 if (scanComments) { 1026 token = COMMENT; 1027 return retPos; 1028 } 1029 break; 1030 1031 case '*': 1032 ch = in.read(); 1033 if (ch == '*') { 1034 docComment = scanDocComment(); 1035 } else { 1036 skipComment(); 1037 } 1038 if (scanComments) { 1039 return retPos; 1040 } 1041 break; 1042 1043 case '=': 1044 ch = in.read(); 1045 token = ASGDIV; 1046 return retPos; 1047 1048 default: 1049 token = DIV; 1050 return retPos; 1051 } 1052 break; 1053 1054 case '"': 1055 scanString(); 1056 return retPos; 1057 1058 case '\'': 1059 scanCharacter(); 1060 return retPos; 1061 1062 case '0': case '1': case '2': case '3': case '4': 1063 case '5': case '6': case '7': case '8': case '9': 1064 scanNumber(); 1065 return retPos; 1066 1067 case '.': 1068 switch (ch = in.read()) { 1069 case '0': case '1': case '2': case '3': case '4': 1070 case '5': case '6': case '7': case '8': case '9': 1071 count = 0; 1072 putc('.'); 1073 scanReal(); 1074 break; 1075 default: 1076 token = FIELD; 1077 } 1078 return retPos; 1079 1080 case '{': 1081 ch = in.read(); 1082 token = LBRACE; 1083 return retPos; 1084 1085 case '}': 1086 ch = in.read(); 1087 token = RBRACE; 1088 return retPos; 1089 1090 case '(': 1091 ch = in.read(); 1092 token = LPAREN; 1093 return retPos; 1094 1095 case ')': 1096 ch = in.read(); 1097 token = RPAREN; 1098 return retPos; 1099 1100 case '[': 1101 ch = in.read(); 1102 token = LSQBRACKET; 1103 return retPos; 1104 1105 case ']': 1106 ch = in.read(); 1107 token = RSQBRACKET; 1108 return retPos; 1109 1110 case ',': 1111 ch = in.read(); 1112 token = COMMA; 1113 return retPos; 1114 1115 case ';': 1116 ch = in.read(); 1117 token = SEMICOLON; 1118 return retPos; 1119 1120 case '?': 1121 ch = in.read(); 1122 token = QUESTIONMARK; 1123 return retPos; 1124 1125 case '~': 1126 ch = in.read(); 1127 token = BITNOT; 1128 return retPos; 1129 1130 case ':': 1131 ch = in.read(); 1132 token = COLON; 1133 return retPos; 1134 1135 case '-': 1136 switch (ch = in.read()) { 1137 case '-': 1138 ch = in.read(); 1139 token = DEC; 1140 return retPos; 1141 1142 case '=': 1143 ch = in.read(); 1144 token = ASGSUB; 1145 return retPos; 1146 } 1147 token = SUB; 1148 return retPos; 1149 1150 case '+': 1151 switch (ch = in.read()) { 1152 case '+': 1153 ch = in.read(); 1154 token = INC; 1155 return retPos; 1156 1157 case '=': 1158 ch = in.read(); 1159 token = ASGADD; 1160 return retPos; 1161 } 1162 token = ADD; 1163 return retPos; 1164 1165 case '<': 1166 switch (ch = in.read()) { 1167 case '<': 1168 if ((ch = in.read()) == '=') { 1169 ch = in.read(); 1170 token = ASGLSHIFT; 1171 return retPos; 1172 } 1173 token = LSHIFT; 1174 return retPos; 1175 1176 case '=': 1177 ch = in.read(); 1178 token = LE; 1179 return retPos; 1180 } 1181 token = LT; 1182 return retPos; 1183 1184 case '>': 1185 switch (ch = in.read()) { 1186 case '>': 1187 switch (ch = in.read()) { 1188 case '=': 1189 ch = in.read(); 1190 token = ASGRSHIFT; 1191 return retPos; 1192 1193 case '>': 1194 if ((ch = in.read()) == '=') { 1195 ch = in.read(); 1196 token = ASGURSHIFT; 1197 return retPos; 1198 } 1199 token = URSHIFT; 1200 return retPos; 1201 } 1202 token = RSHIFT; 1203 return retPos; 1204 1205 case '=': 1206 ch = in.read(); 1207 token = GE; 1208 return retPos; 1209 } 1210 token = GT; 1211 return retPos; 1212 1213 case '|': 1214 switch (ch = in.read()) { 1215 case '|': 1216 ch = in.read(); 1217 token = OR; 1218 return retPos; 1219 1220 case '=': 1221 ch = in.read(); 1222 token = ASGBITOR; 1223 return retPos; 1224 } 1225 token = BITOR; 1226 return retPos; 1227 1228 case '&': 1229 switch (ch = in.read()) { 1230 case '&': 1231 ch = in.read(); 1232 token = AND; 1233 return retPos; 1234 1235 case '=': 1236 ch = in.read(); 1237 token = ASGBITAND; 1238 return retPos; 1239 } 1240 token = BITAND; 1241 return retPos; 1242 1243 case '=': 1244 if ((ch = in.read()) == '=') { 1245 ch = in.read(); 1246 token = EQ; 1247 return retPos; 1248 } 1249 token = ASSIGN; 1250 return retPos; 1251 1252 case '%': 1253 if ((ch = in.read()) == '=') { 1254 ch = in.read(); 1255 token = ASGREM; 1256 return retPos; 1257 } 1258 token = REM; 1259 return retPos; 1260 1261 case '^': 1262 if ((ch = in.read()) == '=') { 1263 ch = in.read(); 1264 token = ASGBITXOR; 1265 return retPos; 1266 } 1267 token = BITXOR; 1268 return retPos; 1269 1270 case '!': 1271 if ((ch = in.read()) == '=') { 1272 ch = in.read(); 1273 token = NE; 1274 return retPos; 1275 } 1276 token = NOT; 1277 return retPos; 1278 1279 case '*': 1280 if ((ch = in.read()) == '=') { 1281 ch = in.read(); 1282 token = ASGMUL; 1283 return retPos; 1284 } 1285 token = MUL; 1286 return retPos; 1287 1288 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1289 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1290 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1291 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 1292 case 'y': case 'z': 1293 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1294 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 1295 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 1296 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 1297 case 'Y': case 'Z': 1298 case '$': case '_': 1299 scanIdentifier(); 1300 return retPos; 1301 1302 case '\u001a': 1303 // Our one concession to DOS. 1304 if ((ch = in.read()) == EOF) { 1305 token = EOF; 1306 return retPos; 1307 } 1308 env.error(pos, "funny.char"); 1309 ch = in.read(); 1310 break; 1311 1312 1313 default: 1314 if (Character.isJavaLetter((char)ch)) { 1315 scanIdentifier(); 1316 return retPos; 1317 } 1318 env.error(pos, "funny.char"); 1319 ch = in.read(); 1320 break; 1321 } 1322 } 1323 } 1324 1325 /** 1326 * Scan to a matching '}', ']' or ')'. The current token must be 1327 * a '{', '[' or '('; 1328 */ 1329 public void match(int open, int close) throws IOException { 1330 int depth = 1; 1331 1332 while (true) { 1333 scan(); 1334 if (token == open) { 1335 depth++; 1336 } else if (token == close) { 1337 if (--depth == 0) { 1338 return; 1339 } 1340 } else if (token == EOF) { 1341 env.error(pos, "unbalanced.paren"); 1342 return; 1343 } 1344 } 1345 } 1346 }