1 /* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.ADD; 29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER; 30 import static jdk.nashorn.internal.parser.TokenType.COMMENT; 31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 33 import static jdk.nashorn.internal.parser.TokenType.EOF; 34 import static jdk.nashorn.internal.parser.TokenType.EOL; 35 import static jdk.nashorn.internal.parser.TokenType.ERROR; 36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 38 import static jdk.nashorn.internal.parser.TokenType.FLOATING; 39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 41 import static jdk.nashorn.internal.parser.TokenType.LBRACE; 42 import static jdk.nashorn.internal.parser.TokenType.LPAREN; 43 import static jdk.nashorn.internal.parser.TokenType.OCTAL; 44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY; 45 import static jdk.nashorn.internal.parser.TokenType.RBRACE; 46 import static jdk.nashorn.internal.parser.TokenType.REGEX; 47 import static jdk.nashorn.internal.parser.TokenType.RPAREN; 48 import static jdk.nashorn.internal.parser.TokenType.STRING; 49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE; 50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD; 51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE; 52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL; 53 import static jdk.nashorn.internal.parser.TokenType.XML; 54 55 import java.io.Serializable; 56 57 import jdk.nashorn.internal.runtime.ECMAErrors; 58 import jdk.nashorn.internal.runtime.ErrorManager; 59 import jdk.nashorn.internal.runtime.JSErrorType; 60 import jdk.nashorn.internal.runtime.JSType; 61 import jdk.nashorn.internal.runtime.ParserException; 62 import jdk.nashorn.internal.runtime.Source; 63 import jdk.nashorn.internal.runtime.options.Options; 64 65 /** 66 * Responsible for converting source content into a stream of tokens. 67 * 68 */ 69 @SuppressWarnings("fallthrough") 70 public class Lexer extends Scanner { 71 private static final long MIN_INT_L = Integer.MIN_VALUE; 72 private static final long MAX_INT_L = Integer.MAX_VALUE; 73 74 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 75 76 /** Content source. */ 77 private final Source source; 78 79 /** Buffered stream for tokens. */ 80 private final TokenStream stream; 81 82 /** True if here and edit strings are supported. */ 83 private final boolean scripting; 84 85 /** True if parsing in ECMAScript 6 mode. */ 86 private final boolean es6; 87 88 /** True if a nested scan. (scan to completion, no EOF.) */ 89 private final boolean nested; 90 91 /** Pending new line number and position. */ 92 int pendingLine; 93 94 /** Position of last EOL + 1. */ 95 private int linePosition; 96 97 /** Type of last token added. */ 98 private TokenType last; 99 100 private final boolean pauseOnFunctionBody; 101 private boolean pauseOnNextLeftBrace; 102 103 private int templateExpressionOpenBraces; 104 105 private static final String SPACETAB = " \t"; // ASCII space and tab 106 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 107 108 private static final String JAVASCRIPT_WHITESPACE_EOL = 109 LFCR + 110 "\u2028" + // line separator 111 "\u2029" // paragraph separator 112 ; 113 private static final String JAVASCRIPT_WHITESPACE = 114 SPACETAB + 115 JAVASCRIPT_WHITESPACE_EOL + 116 "\u000b" + // tabulation line 117 "\u000c" + // ff (ctrl-l) 118 "\u00a0" + // Latin-1 space 119 "\u1680" + // Ogham space mark 120 "\u180e" + // separator, Mongolian vowel 121 "\u2000" + // en quad 122 "\u2001" + // em quad 123 "\u2002" + // en space 124 "\u2003" + // em space 125 "\u2004" + // three-per-em space 126 "\u2005" + // four-per-em space 127 "\u2006" + // six-per-em space 128 "\u2007" + // figure space 129 "\u2008" + // punctuation space 130 "\u2009" + // thin space 131 "\u200a" + // hair space 132 "\u202f" + // narrow no-break space 133 "\u205f" + // medium mathematical space 134 "\u3000" + // ideographic space 135 "\ufeff" // byte order mark 136 ; 137 138 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 139 "\\u000a" + // line feed 140 "\\u000d" + // carriage return (ctrl-m) 141 "\\u2028" + // line separator 142 "\\u2029" + // paragraph separator 143 "\\u0009" + // tab 144 "\\u0020" + // ASCII space 145 "\\u000b" + // tabulation line 146 "\\u000c" + // ff (ctrl-l) 147 "\\u00a0" + // Latin-1 space 148 "\\u1680" + // Ogham space mark 149 "\\u180e" + // separator, Mongolian vowel 150 "\\u2000" + // en quad 151 "\\u2001" + // em quad 152 "\\u2002" + // en space 153 "\\u2003" + // em space 154 "\\u2004" + // three-per-em space 155 "\\u2005" + // four-per-em space 156 "\\u2006" + // six-per-em space 157 "\\u2007" + // figure space 158 "\\u2008" + // punctuation space 159 "\\u2009" + // thin space 160 "\\u200a" + // hair space 161 "\\u202f" + // narrow no-break space 162 "\\u205f" + // medium mathematical space 163 "\\u3000" + // ideographic space 164 "\\ufeff" // byte order mark 165 ; 166 167 static String unicodeEscape(final char ch) { 168 final StringBuilder sb = new StringBuilder(); 169 170 sb.append("\\u"); 171 172 final String hex = Integer.toHexString(ch); 173 for (int i = hex.length(); i < 4; i++) { 174 sb.append('0'); 175 } 176 sb.append(hex); 177 178 return sb.toString(); 179 } 180 181 /** 182 * Constructor 183 * 184 * @param source the source 185 * @param stream the token stream to lex 186 */ 187 public Lexer(final Source source, final TokenStream stream) { 188 this(source, stream, false, false); 189 } 190 191 /** 192 * Constructor 193 * 194 * @param source the source 195 * @param stream the token stream to lex 196 * @param scripting are we in scripting mode 197 * @param es6 are we in ECMAScript 6 mode 198 */ 199 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) { 200 this(source, 0, source.getLength(), stream, scripting, es6, false); 201 } 202 203 /** 204 * Constructor 205 * 206 * @param source the source 207 * @param start start position in source from which to start lexing 208 * @param len length of source segment to lex 209 * @param stream token stream to lex 210 * @param scripting are we in scripting mode 211 * @param es6 are we in ECMAScript 6 mode 212 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 213 * function body. This is used with the feature where the parser is skipping nested function bodies to 214 * avoid reading ahead unnecessarily when we skip the function bodies. 215 */ 216 217 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) { 218 super(source.getContent(), 1, start, len); 219 this.source = source; 220 this.stream = stream; 221 this.scripting = scripting; 222 this.es6 = es6; 223 this.nested = false; 224 this.pendingLine = 1; 225 this.last = EOL; 226 227 this.pauseOnFunctionBody = pauseOnFunctionBody; 228 } 229 230 private Lexer(final Lexer lexer, final State state) { 231 super(lexer, state); 232 233 source = lexer.source; 234 stream = lexer.stream; 235 scripting = lexer.scripting; 236 es6 = lexer.es6; 237 nested = true; 238 239 pendingLine = state.pendingLine; 240 linePosition = state.linePosition; 241 last = EOL; 242 pauseOnFunctionBody = false; 243 } 244 245 static class State extends Scanner.State { 246 /** Pending new line number and position. */ 247 public final int pendingLine; 248 249 /** Position of last EOL + 1. */ 250 public final int linePosition; 251 252 /** Type of last token added. */ 253 public final TokenType last; 254 255 /* 256 * Constructor. 257 */ 258 259 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 260 super(position, limit, line); 261 262 this.pendingLine = pendingLine; 263 this.linePosition = linePosition; 264 this.last = last; 265 } 266 } 267 268 /** 269 * Save the state of the scan. 270 * 271 * @return Captured state. 272 */ 273 @Override 274 State saveState() { 275 return new State(position, limit, line, pendingLine, linePosition, last); 276 } 277 278 /** 279 * Restore the state of the scan. 280 * 281 * @param state 282 * Captured state. 283 */ 284 void restoreState(final State state) { 285 super.restoreState(state); 286 287 pendingLine = state.pendingLine; 288 linePosition = state.linePosition; 289 last = state.last; 290 } 291 292 /** 293 * Add a new token to the stream. 294 * 295 * @param type 296 * Token type. 297 * @param start 298 * Start position. 299 * @param end 300 * End position. 301 */ 302 protected void add(final TokenType type, final int start, final int end) { 303 // Record last token. 304 last = type; 305 306 // Only emit the last EOL in a cluster. 307 if (type == EOL) { 308 pendingLine = end; 309 linePosition = start; 310 } else { 311 // Write any pending EOL to stream. 312 if (pendingLine != -1) { 313 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 314 pendingLine = -1; 315 } 316 317 // Write token to stream. 318 stream.put(Token.toDesc(type, start, end - start)); 319 } 320 } 321 322 /** 323 * Add a new token to the stream. 324 * 325 * @param type 326 * Token type. 327 * @param start 328 * Start position. 329 */ 330 protected void add(final TokenType type, final int start) { 331 add(type, start, position); 332 } 333 334 /** 335 * Return the String of valid whitespace characters for regular 336 * expressions in JavaScript 337 * @return regexp whitespace string 338 */ 339 public static String getWhitespaceRegExp() { 340 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 341 } 342 343 /** 344 * Skip end of line. 345 * 346 * @param addEOL true if EOL token should be recorded. 347 */ 348 private void skipEOL(final boolean addEOL) { 349 350 if (ch0 == '\r') { // detect \r\n pattern 351 skip(1); 352 if (ch0 == '\n') { 353 skip(1); 354 } 355 } else { // all other space, ch0 is guaranteed to be EOL or \0 356 skip(1); 357 } 358 359 // bump up line count 360 line++; 361 362 if (addEOL) { 363 // Add an EOL token. 364 add(EOL, position, line); 365 } 366 } 367 368 /** 369 * Skip over rest of line including end of line. 370 * 371 * @param addEOL true if EOL token should be recorded. 372 */ 373 private void skipLine(final boolean addEOL) { 374 // Ignore characters. 375 while (!isEOL(ch0) && !atEOF()) { 376 skip(1); 377 } 378 // Skip over end of line. 379 skipEOL(addEOL); 380 } 381 382 /** 383 * Test whether a char is valid JavaScript whitespace 384 * @param ch a char 385 * @return true if valid JavaScript whitespace 386 */ 387 public static boolean isJSWhitespace(final char ch) { 388 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 389 } 390 391 /** 392 * Test whether a char is valid JavaScript end of line 393 * @param ch a char 394 * @return true if valid JavaScript end of line 395 */ 396 public static boolean isJSEOL(final char ch) { 397 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 398 } 399 400 /** 401 * Test if char is a string delimiter, e.g. '\' or '"'. 402 * @param ch a char 403 * @return true if string delimiter 404 */ 405 protected boolean isStringDelimiter(final char ch) { 406 return ch == '\'' || ch == '"'; 407 } 408 409 /** 410 * Test if char is a template literal delimiter ('`'). 411 */ 412 private static boolean isTemplateDelimiter(char ch) { 413 return ch == '`'; 414 } 415 416 /** 417 * Test whether a char is valid JavaScript whitespace 418 * @param ch a char 419 * @return true if valid JavaScript whitespace 420 */ 421 protected boolean isWhitespace(final char ch) { 422 return Lexer.isJSWhitespace(ch); 423 } 424 425 /** 426 * Test whether a char is valid JavaScript end of line 427 * @param ch a char 428 * @return true if valid JavaScript end of line 429 */ 430 protected boolean isEOL(final char ch) { 431 return Lexer.isJSEOL(ch); 432 } 433 434 /** 435 * Skip over whitespace and detect end of line, adding EOL tokens if 436 * encountered. 437 * 438 * @param addEOL true if EOL tokens should be recorded. 439 */ 440 private void skipWhitespace(final boolean addEOL) { 441 while (isWhitespace(ch0)) { 442 if (isEOL(ch0)) { 443 skipEOL(addEOL); 444 } else { 445 skip(1); 446 } 447 } 448 } 449 450 /** 451 * Skip over comments. 452 * 453 * @return True if a comment. 454 */ 455 protected boolean skipComments() { 456 // Save the current position. 457 final int start = position; 458 459 if (ch0 == '/') { 460 // Is it a // comment. 461 if (ch1 == '/') { 462 // Skip over //. 463 skip(2); 464 465 boolean directiveComment = false; 466 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 467 directiveComment = true; 468 } 469 470 // Scan for EOL. 471 while (!atEOF() && !isEOL(ch0)) { 472 skip(1); 473 } 474 // Did detect a comment. 475 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 476 return true; 477 } else if (ch1 == '*') { 478 // Skip over /*. 479 skip(2); 480 // Scan for */. 481 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 482 // If end of line handle else skip character. 483 if (isEOL(ch0)) { 484 skipEOL(true); 485 } else { 486 skip(1); 487 } 488 } 489 490 if (atEOF()) { 491 // TODO - Report closing */ missing in parser. 492 add(ERROR, start); 493 } else { 494 // Skip */. 495 skip(2); 496 } 497 498 // Did detect a comment. 499 add(COMMENT, start); 500 return true; 501 } 502 } else if (ch0 == '#') { 503 assert scripting; 504 // shell style comment 505 // Skip over #. 506 skip(1); 507 // Scan for EOL. 508 while (!atEOF() && !isEOL(ch0)) { 509 skip(1); 510 } 511 // Did detect a comment. 512 add(COMMENT, start); 513 return true; 514 } 515 516 // Not a comment. 517 return false; 518 } 519 520 /** 521 * Convert a regex token to a token object. 522 * 523 * @param start Position in source content. 524 * @param length Length of regex token. 525 * @return Regex token object. 526 */ 527 public RegexToken valueOfPattern(final int start, final int length) { 528 // Save the current position. 529 final int savePosition = position; 530 // Reset to beginning of content. 531 reset(start); 532 // Buffer for recording characters. 533 final StringBuilder sb = new StringBuilder(length); 534 535 // Skip /. 536 skip(1); 537 boolean inBrackets = false; 538 // Scan for closing /, stopping at end of line. 539 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 540 // Skip over escaped character. 541 if (ch0 == '\\') { 542 sb.append(ch0); 543 sb.append(ch1); 544 skip(2); 545 } else { 546 if (ch0 == '[') { 547 inBrackets = true; 548 } else if (ch0 == ']') { 549 inBrackets = false; 550 } 551 552 // Skip literal character. 553 sb.append(ch0); 554 skip(1); 555 } 556 } 557 558 // Get pattern as string. 559 final String regex = sb.toString(); 560 561 // Skip /. 562 skip(1); 563 564 // Options as string. 565 final String options = source.getString(position, scanIdentifier()); 566 567 reset(savePosition); 568 569 // Compile the pattern. 570 return new RegexToken(regex, options); 571 } 572 573 /** 574 * Return true if the given token can be the beginning of a literal. 575 * 576 * @param token a token 577 * @return true if token can start a literal. 578 */ 579 public boolean canStartLiteral(final TokenType token) { 580 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 581 } 582 583 /** 584 * interface to receive line information for multi-line literals. 585 */ 586 protected interface LineInfoReceiver { 587 /** 588 * Receives line information 589 * @param line last line number 590 * @param linePosition position of last line 591 */ 592 public void lineInfo(int line, int linePosition); 593 } 594 595 /** 596 * Check whether the given token represents the beginning of a literal. If so scan 597 * the literal and return <tt>true</tt>, otherwise return false. 598 * 599 * @param token the token. 600 * @param startTokenType the token type. 601 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 602 * @return True if a literal beginning with startToken was found and scanned. 603 */ 604 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 605 // Check if it can be a literal. 606 if (!canStartLiteral(startTokenType)) { 607 return false; 608 } 609 // We break on ambiguous tokens so if we already moved on it can't be a literal. 610 if (stream.get(stream.last()) != token) { 611 return false; 612 } 613 // Rewind to token start position 614 reset(Token.descPosition(token)); 615 616 if (ch0 == '/') { 617 return scanRegEx(); 618 } else if (ch0 == '<') { 619 if (ch1 == '<') { 620 return scanHereString(lir); 621 } else if (Character.isJavaIdentifierStart(ch1)) { 622 return scanXMLLiteral(); 623 } 624 } 625 626 return false; 627 } 628 629 /** 630 * Scan over regex literal. 631 * 632 * @return True if a regex literal. 633 */ 634 private boolean scanRegEx() { 635 assert ch0 == '/'; 636 // Make sure it's not a comment. 637 if (ch1 != '/' && ch1 != '*') { 638 // Record beginning of literal. 639 final int start = position; 640 // Skip /. 641 skip(1); 642 boolean inBrackets = false; 643 644 // Scan for closing /, stopping at end of line. 645 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 646 // Skip over escaped character. 647 if (ch0 == '\\') { 648 skip(1); 649 if (isEOL(ch0)) { 650 reset(start); 651 return false; 652 } 653 skip(1); 654 } else { 655 if (ch0 == '[') { 656 inBrackets = true; 657 } else if (ch0 == ']') { 658 inBrackets = false; 659 } 660 661 // Skip literal character. 662 skip(1); 663 } 664 } 665 666 // If regex literal. 667 if (ch0 == '/') { 668 // Skip /. 669 skip(1); 670 671 // Skip over options. 672 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 673 skip(1); 674 } 675 676 // Add regex token. 677 add(REGEX, start); 678 // Regex literal detected. 679 return true; 680 } 681 682 // False start try again. 683 reset(start); 684 } 685 686 // Regex literal not detected. 687 return false; 688 } 689 690 /** 691 * Convert a digit to a integer. Can't use Character.digit since we are 692 * restricted to ASCII by the spec. 693 * 694 * @param ch Character to convert. 695 * @param base Numeric base. 696 * 697 * @return The converted digit or -1 if invalid. 698 */ 699 protected static int convertDigit(final char ch, final int base) { 700 int digit; 701 702 if ('0' <= ch && ch <= '9') { 703 digit = ch - '0'; 704 } else if ('A' <= ch && ch <= 'Z') { 705 digit = ch - 'A' + 10; 706 } else if ('a' <= ch && ch <= 'z') { 707 digit = ch - 'a' + 10; 708 } else { 709 return -1; 710 } 711 712 return digit < base ? digit : -1; 713 } 714 715 716 /** 717 * Get the value of a hexadecimal numeric sequence. 718 * 719 * @param length Number of digits. 720 * @param type Type of token to report against. 721 * @return Value of sequence or < 0 if no digits. 722 */ 723 private int hexSequence(final int length, final TokenType type) { 724 int value = 0; 725 726 for (int i = 0; i < length; i++) { 727 final int digit = convertDigit(ch0, 16); 728 729 if (digit == -1) { 730 error(Lexer.message("invalid.hex"), type, position, limit); 731 return i == 0 ? -1 : value; 732 } 733 734 value = digit | value << 4; 735 skip(1); 736 } 737 738 return value; 739 } 740 741 /** 742 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 743 * 744 * @return Value of sequence. 745 */ 746 private int octalSequence() { 747 int value = 0; 748 749 for (int i = 0; i < 3; i++) { 750 final int digit = convertDigit(ch0, 8); 751 752 if (digit == -1) { 753 break; 754 } 755 value = digit | value << 3; 756 skip(1); 757 758 if (i == 1 && value >= 32) { 759 break; 760 } 761 } 762 return value; 763 } 764 765 /** 766 * Convert a string to a JavaScript identifier. 767 * 768 * @param start Position in source content. 769 * @param length Length of token. 770 * @return Ident string or null if an error. 771 */ 772 private String valueOfIdent(final int start, final int length) throws RuntimeException { 773 // Save the current position. 774 final int savePosition = position; 775 // End of scan. 776 final int end = start + length; 777 // Reset to beginning of content. 778 reset(start); 779 // Buffer for recording characters. 780 final StringBuilder sb = new StringBuilder(length); 781 782 // Scan until end of line or end of file. 783 while (!atEOF() && position < end && !isEOL(ch0)) { 784 // If escape character. 785 if (ch0 == '\\' && ch1 == 'u') { 786 skip(2); 787 final int ch = hexSequence(4, TokenType.IDENT); 788 if (isWhitespace((char)ch)) { 789 return null; 790 } 791 if (ch < 0) { 792 sb.append('\\'); 793 sb.append('u'); 794 } else { 795 sb.append((char)ch); 796 } 797 } else { 798 // Add regular character. 799 sb.append(ch0); 800 skip(1); 801 } 802 } 803 804 // Restore position. 805 reset(savePosition); 806 807 return sb.toString(); 808 } 809 810 /** 811 * Scan over and identifier or keyword. Handles identifiers containing 812 * encoded Unicode chars. 813 * 814 * Example: 815 * 816 * var \u0042 = 44; 817 */ 818 private void scanIdentifierOrKeyword() { 819 // Record beginning of identifier. 820 final int start = position; 821 // Scan identifier. 822 final int length = scanIdentifier(); 823 // Check to see if it is a keyword. 824 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 825 if (type == FUNCTION && pauseOnFunctionBody) { 826 pauseOnNextLeftBrace = true; 827 } 828 // Add keyword or identifier token. 829 add(type, start); 830 } 831 832 /** 833 * Convert a string to a JavaScript string object. 834 * 835 * @param start Position in source content. 836 * @param length Length of token. 837 * @return JavaScript string object. 838 */ 839 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 840 // Save the current position. 841 final int savePosition = position; 842 // Calculate the end position. 843 final int end = start + length; 844 // Reset to beginning of string. 845 reset(start); 846 847 // Buffer for recording characters. 848 final StringBuilder sb = new StringBuilder(length); 849 850 // Scan until end of string. 851 while (position < end) { 852 // If escape character. 853 if (ch0 == '\\') { 854 skip(1); 855 856 final char next = ch0; 857 final int afterSlash = position; 858 859 skip(1); 860 861 // Special characters. 862 switch (next) { 863 case '0': 864 case '1': 865 case '2': 866 case '3': 867 case '4': 868 case '5': 869 case '6': 870 case '7': { 871 if (strict) { 872 // "\0" itself is allowed in strict mode. Only other 'real' 873 // octal escape sequences are not allowed (eg. "\02", "\31"). 874 // See section 7.8.4 String literals production EscapeSequence 875 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 876 error(Lexer.message("strict.no.octal"), STRING, position, limit); 877 } 878 } 879 reset(afterSlash); 880 // Octal sequence. 881 final int ch = octalSequence(); 882 883 if (ch < 0) { 884 sb.append('\\'); 885 sb.append('x'); 886 } else { 887 sb.append((char)ch); 888 } 889 break; 890 } 891 case 'n': 892 sb.append('\n'); 893 break; 894 case 't': 895 sb.append('\t'); 896 break; 897 case 'b': 898 sb.append('\b'); 899 break; 900 case 'f': 901 sb.append('\f'); 902 break; 903 case 'r': 904 sb.append('\r'); 905 break; 906 case '\'': 907 sb.append('\''); 908 break; 909 case '\"': 910 sb.append('\"'); 911 break; 912 case '\\': 913 sb.append('\\'); 914 break; 915 case '\r': // CR | CRLF 916 if (ch0 == '\n') { 917 skip(1); 918 } 919 // fall through 920 case '\n': // LF 921 case '\u2028': // LS 922 case '\u2029': // PS 923 // continue on the next line, slash-return continues string 924 // literal 925 break; 926 case 'x': { 927 // Hex sequence. 928 final int ch = hexSequence(2, STRING); 929 930 if (ch < 0) { 931 sb.append('\\'); 932 sb.append('x'); 933 } else { 934 sb.append((char)ch); 935 } 936 } 937 break; 938 case 'u': { 939 // Unicode sequence. 940 final int ch = hexSequence(4, STRING); 941 942 if (ch < 0) { 943 sb.append('\\'); 944 sb.append('u'); 945 } else { 946 sb.append((char)ch); 947 } 948 } 949 break; 950 case 'v': 951 sb.append('\u000B'); 952 break; 953 // All other characters. 954 default: 955 sb.append(next); 956 break; 957 } 958 } else if (ch0 == '\r') { 959 // Convert CR-LF or CR to LF line terminator. 960 sb.append('\n'); 961 skip(ch1 == '\n' ? 2 : 1); 962 } else { 963 // Add regular character. 964 sb.append(ch0); 965 skip(1); 966 } 967 } 968 969 // Restore position. 970 reset(savePosition); 971 972 return sb.toString(); 973 } 974 975 /** 976 * Scan over a string literal. 977 * @param add true if we are not just scanning but should actually modify the token stream 978 */ 979 protected void scanString(final boolean add) { 980 // Type of string. 981 TokenType type = STRING; 982 // Record starting quote. 983 final char quote = ch0; 984 // Skip over quote. 985 skip(1); 986 987 // Record beginning of string content. 988 final State stringState = saveState(); 989 990 // Scan until close quote or end of line. 991 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 992 // Skip over escaped character. 993 if (ch0 == '\\') { 994 type = ESCSTRING; 995 skip(1); 996 if (! isEscapeCharacter(ch0)) { 997 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 998 } 999 if (isEOL(ch0)) { 1000 // Multiline string literal 1001 skipEOL(false); 1002 continue; 1003 } 1004 } 1005 // Skip literal character. 1006 skip(1); 1007 } 1008 1009 // If close quote. 1010 if (ch0 == quote) { 1011 // Skip close quote. 1012 skip(1); 1013 } else { 1014 error(Lexer.message("missing.close.quote"), STRING, position, limit); 1015 } 1016 1017 // If not just scanning. 1018 if (add) { 1019 // Record end of string. 1020 stringState.setLimit(position - 1); 1021 1022 if (scripting && !stringState.isEmpty()) { 1023 switch (quote) { 1024 case '`': 1025 // Mark the beginning of an exec string. 1026 add(EXECSTRING, stringState.position, stringState.limit); 1027 // Frame edit string with left brace. 1028 add(LBRACE, stringState.position, stringState.position); 1029 // Process edit string. 1030 editString(type, stringState); 1031 // Frame edit string with right brace. 1032 add(RBRACE, stringState.limit, stringState.limit); 1033 break; 1034 case '"': 1035 // Only edit double quoted strings. 1036 editString(type, stringState); 1037 break; 1038 case '\'': 1039 // Add string token without editing. 1040 add(type, stringState.position, stringState.limit); 1041 break; 1042 default: 1043 break; 1044 } 1045 } else { 1046 /// Add string token without editing. 1047 add(type, stringState.position, stringState.limit); 1048 } 1049 } 1050 } 1051 1052 /** 1053 * Scan over a template string literal. 1054 */ 1055 private void scanTemplate() { 1056 assert ch0 == '`'; 1057 TokenType type = TEMPLATE; 1058 1059 // Skip over quote and record beginning of string content. 1060 skip(1); 1061 State stringState = saveState(); 1062 1063 // Scan until close quote 1064 while (!atEOF()) { 1065 // Skip over escaped character. 1066 if (ch0 == '`') { 1067 skip(1); 1068 // Record end of string. 1069 stringState.setLimit(position - 1); 1070 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit); 1071 return; 1072 } else if (ch0 == '$' && ch1 == '{') { 1073 skip(2); 1074 stringState.setLimit(position - 2); 1075 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit); 1076 1077 // scan to RBRACE 1078 Lexer expressionLexer = new Lexer(this, saveState()); 1079 expressionLexer.templateExpressionOpenBraces = 1; 1080 expressionLexer.lexify(); 1081 restoreState(expressionLexer.saveState()); 1082 1083 // scan next middle or tail of the template literal 1084 assert ch0 == '}'; 1085 type = TEMPLATE_MIDDLE; 1086 1087 // Skip over rbrace and record beginning of string content. 1088 skip(1); 1089 stringState = saveState(); 1090 1091 continue; 1092 } else if (ch0 == '\\') { 1093 skip(1); 1094 // EscapeSequence 1095 if (!isEscapeCharacter(ch0)) { 1096 error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit); 1097 } 1098 if (isEOL(ch0)) { 1099 // LineContinuation 1100 skipEOL(false); 1101 continue; 1102 } 1103 } else if (isEOL(ch0)) { 1104 // LineTerminatorSequence 1105 skipEOL(false); 1106 continue; 1107 } 1108 1109 // Skip literal character. 1110 skip(1); 1111 } 1112 1113 error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit); 1114 } 1115 1116 /** 1117 * Is the given character a valid escape char after "\" ? 1118 * 1119 * @param ch character to be checked 1120 * @return if the given character is valid after "\" 1121 */ 1122 protected boolean isEscapeCharacter(final char ch) { 1123 return true; 1124 } 1125 1126 /** 1127 * Convert string to number. 1128 * 1129 * @param valueString String to convert. 1130 * @param radix Numeric base. 1131 * @return Converted number. 1132 */ 1133 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1134 try { 1135 final long value = Long.parseLong(valueString, radix); 1136 if(value >= MIN_INT_L && value <= MAX_INT_L) { 1137 return (int)value; 1138 } 1139 return value; 1140 } catch (final NumberFormatException e) { 1141 if (radix == 10) { 1142 return Double.valueOf(valueString); 1143 } 1144 1145 double value = 0.0; 1146 1147 for (int i = 0; i < valueString.length(); i++) { 1148 final char ch = valueString.charAt(i); 1149 // Preverified, should always be a valid digit. 1150 final int digit = convertDigit(ch, radix); 1151 value *= radix; 1152 value += digit; 1153 } 1154 1155 return value; 1156 } 1157 } 1158 1159 /** 1160 * Scan a number. 1161 */ 1162 protected void scanNumber() { 1163 // Record beginning of number. 1164 final int start = position; 1165 // Assume value is a decimal. 1166 TokenType type = DECIMAL; 1167 1168 // First digit of number. 1169 int digit = convertDigit(ch0, 10); 1170 1171 // If number begins with 0x. 1172 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1173 // Skip over 0xN. 1174 skip(3); 1175 // Skip over remaining digits. 1176 while (convertDigit(ch0, 16) != -1) { 1177 skip(1); 1178 } 1179 1180 type = HEXADECIMAL; 1181 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) { 1182 // Skip over 0oN. 1183 skip(3); 1184 // Skip over remaining digits. 1185 while (convertDigit(ch0, 8) != -1) { 1186 skip(1); 1187 } 1188 1189 type = OCTAL; 1190 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) { 1191 // Skip over 0bN. 1192 skip(3); 1193 // Skip over remaining digits. 1194 while (convertDigit(ch0, 2) != -1) { 1195 skip(1); 1196 } 1197 1198 type = BINARY_NUMBER; 1199 } else { 1200 // Check for possible octal constant. 1201 boolean octal = digit == 0; 1202 // Skip first digit if not leading '.'. 1203 if (digit != -1) { 1204 skip(1); 1205 } 1206 1207 // Skip remaining digits. 1208 while ((digit = convertDigit(ch0, 10)) != -1) { 1209 // Check octal only digits. 1210 octal = octal && digit < 8; 1211 // Skip digit. 1212 skip(1); 1213 } 1214 1215 if (octal && position - start > 1) { 1216 type = OCTAL_LEGACY; 1217 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1218 // Must be a double. 1219 if (ch0 == '.') { 1220 // Skip period. 1221 skip(1); 1222 // Skip mantissa. 1223 while (convertDigit(ch0, 10) != -1) { 1224 skip(1); 1225 } 1226 } 1227 1228 // Detect exponent. 1229 if (ch0 == 'E' || ch0 == 'e') { 1230 // Skip E. 1231 skip(1); 1232 // Detect and skip exponent sign. 1233 if (ch0 == '+' || ch0 == '-') { 1234 skip(1); 1235 } 1236 // Skip exponent. 1237 while (convertDigit(ch0, 10) != -1) { 1238 skip(1); 1239 } 1240 } 1241 1242 type = FLOATING; 1243 } 1244 } 1245 1246 if (Character.isJavaIdentifierStart(ch0)) { 1247 error(Lexer.message("missing.space.after.number"), type, position, 1); 1248 } 1249 1250 // Add number token. 1251 add(type, start); 1252 } 1253 1254 /** 1255 * Convert a regex token to a token object. 1256 * 1257 * @param start Position in source content. 1258 * @param length Length of regex token. 1259 * @return Regex token object. 1260 */ 1261 XMLToken valueOfXML(final int start, final int length) { 1262 return new XMLToken(source.getString(start, length)); 1263 } 1264 1265 /** 1266 * Scan over a XML token. 1267 * 1268 * @return TRUE if is an XML literal. 1269 */ 1270 private boolean scanXMLLiteral() { 1271 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1272 if (XML_LITERALS) { 1273 // Record beginning of xml expression. 1274 final int start = position; 1275 1276 int openCount = 0; 1277 1278 do { 1279 if (ch0 == '<') { 1280 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1281 skip(3); 1282 openCount--; 1283 } else if (Character.isJavaIdentifierStart(ch1)) { 1284 skip(2); 1285 openCount++; 1286 } else if (ch1 == '?') { 1287 skip(2); 1288 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1289 skip(4); 1290 } else { 1291 reset(start); 1292 return false; 1293 } 1294 1295 while (!atEOF() && ch0 != '>') { 1296 if (ch0 == '/' && ch1 == '>') { 1297 openCount--; 1298 skip(1); 1299 break; 1300 } else if (ch0 == '\"' || ch0 == '\'') { 1301 scanString(false); 1302 } else { 1303 skip(1); 1304 } 1305 } 1306 1307 if (ch0 != '>') { 1308 reset(start); 1309 return false; 1310 } 1311 1312 skip(1); 1313 } else if (atEOF()) { 1314 reset(start); 1315 return false; 1316 } else { 1317 skip(1); 1318 } 1319 } while (openCount > 0); 1320 1321 add(XML, start); 1322 return true; 1323 } 1324 1325 return false; 1326 } 1327 1328 /** 1329 * Scan over identifier characters. 1330 * 1331 * @return Length of identifier or zero if none found. 1332 */ 1333 private int scanIdentifier() { 1334 final int start = position; 1335 1336 // Make sure first character is valid start character. 1337 if (ch0 == '\\' && ch1 == 'u') { 1338 skip(2); 1339 final int ch = hexSequence(4, TokenType.IDENT); 1340 1341 if (!Character.isJavaIdentifierStart(ch)) { 1342 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1343 } 1344 } else if (!Character.isJavaIdentifierStart(ch0)) { 1345 // Not an identifier. 1346 return 0; 1347 } 1348 1349 // Make sure remaining characters are valid part characters. 1350 while (!atEOF()) { 1351 if (ch0 == '\\' && ch1 == 'u') { 1352 skip(2); 1353 final int ch = hexSequence(4, TokenType.IDENT); 1354 1355 if (!Character.isJavaIdentifierPart(ch)) { 1356 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1357 } 1358 } else if (Character.isJavaIdentifierPart(ch0)) { 1359 skip(1); 1360 } else { 1361 break; 1362 } 1363 } 1364 1365 // Length of identifier sequence. 1366 return position - start; 1367 } 1368 1369 /** 1370 * Compare two identifiers (in content) for equality. 1371 * 1372 * @param aStart Start of first identifier. 1373 * @param aLength Length of first identifier. 1374 * @param bStart Start of second identifier. 1375 * @param bLength Length of second identifier. 1376 * @return True if equal. 1377 */ 1378 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1379 if (aLength == bLength) { 1380 for (int i = 0; i < aLength; i++) { 1381 if (content[aStart + i] != content[bStart + i]) { 1382 return false; 1383 } 1384 } 1385 1386 return true; 1387 } 1388 1389 return false; 1390 } 1391 1392 /** 1393 * Detect if a line starts with a marker identifier. 1394 * 1395 * @param identStart Start of identifier. 1396 * @param identLength Length of identifier. 1397 * @return True if detected. 1398 */ 1399 private boolean hasHereMarker(final int identStart, final int identLength) { 1400 // Skip any whitespace. 1401 skipWhitespace(false); 1402 1403 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1404 } 1405 1406 /** 1407 * Lexer to service edit strings. 1408 */ 1409 private static class EditStringLexer extends Lexer { 1410 /** Type of string literals to emit. */ 1411 final TokenType stringType; 1412 1413 /* 1414 * Constructor. 1415 */ 1416 1417 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1418 super(lexer, stringState); 1419 1420 this.stringType = stringType; 1421 } 1422 1423 /** 1424 * Lexify the contents of the string. 1425 */ 1426 @Override 1427 public void lexify() { 1428 // Record start of string position. 1429 int stringStart = position; 1430 // Indicate that the priming first string has not been emitted. 1431 boolean primed = false; 1432 1433 while (true) { 1434 // Detect end of content. 1435 if (atEOF()) { 1436 break; 1437 } 1438 1439 // Honour escapes (should be well formed.) 1440 if (ch0 == '\\' && stringType == ESCSTRING) { 1441 skip(2); 1442 1443 continue; 1444 } 1445 1446 // If start of expression. 1447 if (ch0 == '$' && ch1 == '{') { 1448 if (!primed || stringStart != position) { 1449 if (primed) { 1450 add(ADD, stringStart, stringStart + 1); 1451 } 1452 1453 add(stringType, stringStart, position); 1454 primed = true; 1455 } 1456 1457 // Skip ${ 1458 skip(2); 1459 1460 // Save expression state. 1461 final State expressionState = saveState(); 1462 1463 // Start with one open brace. 1464 int braceCount = 1; 1465 1466 // Scan for the rest of the string. 1467 while (!atEOF()) { 1468 // If closing brace. 1469 if (ch0 == '}') { 1470 // Break only only if matching brace. 1471 if (--braceCount == 0) { 1472 break; 1473 } 1474 } else if (ch0 == '{') { 1475 // Bump up the brace count. 1476 braceCount++; 1477 } 1478 1479 // Skip to next character. 1480 skip(1); 1481 } 1482 1483 // If braces don't match then report an error. 1484 if (braceCount != 0) { 1485 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1486 } 1487 1488 // Mark end of expression. 1489 expressionState.setLimit(position); 1490 // Skip closing brace. 1491 skip(1); 1492 1493 // Start next string. 1494 stringStart = position; 1495 1496 // Concatenate expression. 1497 add(ADD, expressionState.position, expressionState.position + 1); 1498 add(LPAREN, expressionState.position, expressionState.position + 1); 1499 1500 // Scan expression. 1501 final Lexer lexer = new Lexer(this, expressionState); 1502 lexer.lexify(); 1503 1504 // Close out expression parenthesis. 1505 add(RPAREN, position - 1, position); 1506 1507 continue; 1508 } 1509 1510 // Next character in string. 1511 skip(1); 1512 } 1513 1514 // If there is any unemitted string portion. 1515 if (stringStart != limit) { 1516 // Concatenate remaining string. 1517 if (primed) { 1518 add(ADD, stringStart, 1); 1519 } 1520 1521 add(stringType, stringStart, limit); 1522 } 1523 } 1524 1525 } 1526 1527 /** 1528 * Edit string for nested expressions. 1529 * 1530 * @param stringType Type of string literals to emit. 1531 * @param stringState State of lexer at start of string. 1532 */ 1533 private void editString(final TokenType stringType, final State stringState) { 1534 // Use special lexer to scan string. 1535 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1536 lexer.lexify(); 1537 1538 // Need to keep lexer informed. 1539 last = stringType; 1540 } 1541 1542 /** 1543 * Scan over a here string. 1544 * 1545 * @return TRUE if is a here string. 1546 */ 1547 private boolean scanHereString(final LineInfoReceiver lir) { 1548 assert ch0 == '<' && ch1 == '<'; 1549 if (scripting) { 1550 // Record beginning of here string. 1551 final State saved = saveState(); 1552 1553 // << or <<< 1554 final boolean excludeLastEOL = ch2 != '<'; 1555 1556 if (excludeLastEOL) { 1557 skip(2); 1558 } else { 1559 skip(3); 1560 } 1561 1562 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1563 final char quoteChar = ch0; 1564 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1565 if (noStringEditing) { 1566 skip(1); 1567 } 1568 final int identStart = position; 1569 final int identLength = scanIdentifier(); 1570 if (noStringEditing) { 1571 if (ch0 != quoteChar) { 1572 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1573 restoreState(saved); 1574 return false; 1575 } 1576 skip(1); 1577 } 1578 1579 // Check for identifier. 1580 if (identLength == 0) { 1581 // Treat as shift. 1582 restoreState(saved); 1583 1584 return false; 1585 } 1586 1587 // Record rest of line. 1588 final State restState = saveState(); 1589 // keep line number updated 1590 int lastLine = line; 1591 1592 skipLine(false); 1593 lastLine++; 1594 int lastLinePosition = position; 1595 restState.setLimit(position); 1596 1597 // Record beginning of string. 1598 final State stringState = saveState(); 1599 int stringEnd = position; 1600 1601 // Hunt down marker. 1602 while (!atEOF()) { 1603 // Skip any whitespace. 1604 skipWhitespace(false); 1605 1606 if (hasHereMarker(identStart, identLength)) { 1607 break; 1608 } 1609 1610 skipLine(false); 1611 lastLine++; 1612 lastLinePosition = position; 1613 stringEnd = position; 1614 } 1615 1616 // notify last line information 1617 lir.lineInfo(lastLine, lastLinePosition); 1618 1619 // Record end of string. 1620 stringState.setLimit(stringEnd); 1621 1622 // If marker is missing. 1623 if (stringState.isEmpty() || atEOF()) { 1624 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1625 restoreState(saved); 1626 1627 return false; 1628 } 1629 1630 // Remove last end of line if specified. 1631 if (excludeLastEOL) { 1632 // Handles \n. 1633 if (content[stringEnd - 1] == '\n') { 1634 stringEnd--; 1635 } 1636 1637 // Handles \r and \r\n. 1638 if (content[stringEnd - 1] == '\r') { 1639 stringEnd--; 1640 } 1641 1642 // Update end of string. 1643 stringState.setLimit(stringEnd); 1644 } 1645 1646 // Edit string if appropriate. 1647 if (!noStringEditing && !stringState.isEmpty()) { 1648 editString(STRING, stringState); 1649 } else { 1650 // Add here string. 1651 add(STRING, stringState.position, stringState.limit); 1652 } 1653 1654 // Scan rest of original line. 1655 final Lexer restLexer = new Lexer(this, restState); 1656 1657 restLexer.lexify(); 1658 1659 return true; 1660 } 1661 1662 return false; 1663 } 1664 1665 /** 1666 * Breaks source content down into lex units, adding tokens to the token 1667 * stream. The routine scans until the stream buffer is full. Can be called 1668 * repeatedly until EOF is detected. 1669 */ 1670 public void lexify() { 1671 while (!stream.isFull() || nested) { 1672 // Skip over whitespace. 1673 skipWhitespace(true); 1674 1675 // Detect end of file. 1676 if (atEOF()) { 1677 if (!nested) { 1678 // Add an EOF token at the end. 1679 add(EOF, position); 1680 } 1681 1682 break; 1683 } 1684 1685 // Check for comments. Note that we don't scan for regexp and other literals here as 1686 // we may not have enough context to distinguish them from similar looking operators. 1687 // Instead we break on ambiguous operators below and let the parser decide. 1688 if (ch0 == '/' && skipComments()) { 1689 continue; 1690 } 1691 1692 if (scripting && ch0 == '#' && skipComments()) { 1693 continue; 1694 } 1695 1696 // TokenType for lookup of delimiter or operator. 1697 TokenType type; 1698 1699 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1700 // '.' followed by digit. 1701 // Scan and add a number. 1702 scanNumber(); 1703 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1704 if (templateExpressionOpenBraces > 0) { 1705 if (type == LBRACE) { 1706 templateExpressionOpenBraces++; 1707 } else if (type == RBRACE) { 1708 if (--templateExpressionOpenBraces == 0) { 1709 break; 1710 } 1711 } 1712 } 1713 1714 // Get the number of characters in the token. 1715 final int typeLength = type.getLength(); 1716 // Skip that many characters. 1717 skip(typeLength); 1718 // Add operator token. 1719 add(type, position - typeLength); 1720 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1721 // We break to let the parser decide what it is. 1722 if (canStartLiteral(type)) { 1723 break; 1724 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1725 pauseOnNextLeftBrace = false; 1726 break; 1727 } 1728 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1729 // Scan and add identifier or keyword. 1730 scanIdentifierOrKeyword(); 1731 } else if (isStringDelimiter(ch0)) { 1732 // Scan and add a string. 1733 scanString(true); 1734 } else if (Character.isDigit(ch0)) { 1735 // Scan and add a number. 1736 scanNumber(); 1737 } else if (isTemplateDelimiter(ch0) && es6) { 1738 // Scan and add template in ES6 mode. 1739 scanTemplate(); 1740 } else if (isTemplateDelimiter(ch0) && scripting) { 1741 // Scan and add an exec string ('`') in scripting mode. 1742 scanString(true); 1743 } else { 1744 // Don't recognize this character. 1745 skip(1); 1746 add(ERROR, position - 1); 1747 } 1748 } 1749 } 1750 1751 /** 1752 * Return value of token given its token descriptor. 1753 * 1754 * @param token Token descriptor. 1755 * @return JavaScript value. 1756 */ 1757 Object getValueOf(final long token, final boolean strict) { 1758 final int start = Token.descPosition(token); 1759 final int len = Token.descLength(token); 1760 1761 switch (Token.descType(token)) { 1762 case DECIMAL: 1763 return Lexer.valueOf(source.getString(start, len), 10); // number 1764 case HEXADECIMAL: 1765 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1766 case OCTAL_LEGACY: 1767 return Lexer.valueOf(source.getString(start, len), 8); // number 1768 case OCTAL: 1769 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number 1770 case BINARY_NUMBER: 1771 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number 1772 case FLOATING: 1773 final String str = source.getString(start, len); 1774 final double value = Double.valueOf(str); 1775 if (str.indexOf('.') != -1) { 1776 return value; //number 1777 } 1778 //anything without an explicit decimal point is still subject to a 1779 //"representable as int or long" check. Then the programmer does not 1780 //explicitly code something as a double. For example new Color(int, int, int) 1781 //and new Color(float, float, float) will get ambiguous for cases like 1782 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1783 //yet we don't want e.g. 1e6 to be a double unnecessarily 1784 if (JSType.isStrictlyRepresentableAsInt(value)) { 1785 return (int)value; 1786 } else if (JSType.isStrictlyRepresentableAsLong(value)) { 1787 return (long)value; 1788 } 1789 return value; 1790 case STRING: 1791 return source.getString(start, len); // String 1792 case ESCSTRING: 1793 return valueOfString(start, len, strict); // String 1794 case IDENT: 1795 return valueOfIdent(start, len); // String 1796 case REGEX: 1797 return valueOfPattern(start, len); // RegexToken::LexerToken 1798 case TEMPLATE: 1799 case TEMPLATE_HEAD: 1800 case TEMPLATE_MIDDLE: 1801 case TEMPLATE_TAIL: 1802 return valueOfString(start, len, true); // String 1803 case XML: 1804 return valueOfXML(start, len); // XMLToken::LexerToken 1805 case DIRECTIVE_COMMENT: 1806 return source.getString(start, len); 1807 default: 1808 break; 1809 } 1810 1811 return null; 1812 } 1813 1814 /** 1815 * Get the raw string value of a template literal string part. 1816 * 1817 * @param token template string token 1818 * @return raw string 1819 */ 1820 public String valueOfRawString(final long token) { 1821 final int start = Token.descPosition(token); 1822 final int length = Token.descLength(token); 1823 1824 // Save the current position. 1825 final int savePosition = position; 1826 // Calculate the end position. 1827 final int end = start + length; 1828 // Reset to beginning of string. 1829 reset(start); 1830 1831 // Buffer for recording characters. 1832 final StringBuilder sb = new StringBuilder(length); 1833 1834 // Scan until end of string. 1835 while (position < end) { 1836 if (ch0 == '\r') { 1837 // Convert CR-LF or CR to LF line terminator. 1838 sb.append('\n'); 1839 skip(ch1 == '\n' ? 2 : 1); 1840 } else { 1841 // Add regular character. 1842 sb.append(ch0); 1843 skip(1); 1844 } 1845 } 1846 1847 // Restore position. 1848 reset(savePosition); 1849 1850 return sb.toString(); 1851 } 1852 1853 /** 1854 * Get the correctly localized error message for a given message id format arguments 1855 * @param msgId message id 1856 * @param args format arguments 1857 * @return message 1858 */ 1859 protected static String message(final String msgId, final String... args) { 1860 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1861 } 1862 1863 /** 1864 * Generate a runtime exception 1865 * 1866 * @param message error message 1867 * @param type token type 1868 * @param start start position of lexed error 1869 * @param length length of lexed error 1870 * @throws ParserException unconditionally 1871 */ 1872 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1873 final long token = Token.toDesc(type, start, length); 1874 final int pos = Token.descPosition(token); 1875 final int lineNum = source.getLine(pos); 1876 final int columnNum = source.getColumn(pos); 1877 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1878 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1879 } 1880 1881 /** 1882 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1883 * This is the abstract superclass 1884 */ 1885 public static abstract class LexerToken implements Serializable { 1886 private static final long serialVersionUID = 1L; 1887 1888 private final String expression; 1889 1890 /** 1891 * Constructor 1892 * @param expression token expression 1893 */ 1894 protected LexerToken(final String expression) { 1895 this.expression = expression; 1896 } 1897 1898 /** 1899 * Get the expression 1900 * @return expression 1901 */ 1902 public String getExpression() { 1903 return expression; 1904 } 1905 } 1906 1907 /** 1908 * Temporary container for regular expressions. 1909 */ 1910 public static class RegexToken extends LexerToken { 1911 private static final long serialVersionUID = 1L; 1912 1913 /** Options. */ 1914 private final String options; 1915 1916 /** 1917 * Constructor. 1918 * 1919 * @param expression regexp expression 1920 * @param options regexp options 1921 */ 1922 public RegexToken(final String expression, final String options) { 1923 super(expression); 1924 this.options = options; 1925 } 1926 1927 /** 1928 * Get regexp options 1929 * @return options 1930 */ 1931 public String getOptions() { 1932 return options; 1933 } 1934 1935 @Override 1936 public String toString() { 1937 return '/' + getExpression() + '/' + options; 1938 } 1939 } 1940 1941 /** 1942 * Temporary container for XML expression. 1943 */ 1944 public static class XMLToken extends LexerToken { 1945 private static final long serialVersionUID = 1L; 1946 1947 /** 1948 * Constructor. 1949 * 1950 * @param expression XML expression 1951 */ 1952 public XMLToken(final String expression) { 1953 super(expression); 1954 } 1955 } 1956 }