1 /* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.ADD; 29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER; 30 import static jdk.nashorn.internal.parser.TokenType.COMMENT; 31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 33 import static jdk.nashorn.internal.parser.TokenType.EOF; 34 import static jdk.nashorn.internal.parser.TokenType.EOL; 35 import static jdk.nashorn.internal.parser.TokenType.ERROR; 36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 38 import static jdk.nashorn.internal.parser.TokenType.FLOATING; 39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 41 import static jdk.nashorn.internal.parser.TokenType.LBRACE; 42 import static jdk.nashorn.internal.parser.TokenType.LPAREN; 43 import static jdk.nashorn.internal.parser.TokenType.OCTAL; 44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY; 45 import static jdk.nashorn.internal.parser.TokenType.RBRACE; 46 import static jdk.nashorn.internal.parser.TokenType.REGEX; 47 import static jdk.nashorn.internal.parser.TokenType.RPAREN; 48 import static jdk.nashorn.internal.parser.TokenType.STRING; 49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE; 50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD; 51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE; 52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL; 53 import static jdk.nashorn.internal.parser.TokenType.XML; 54 55 import java.io.Serializable; 56 57 import jdk.nashorn.internal.runtime.ECMAErrors; 58 import jdk.nashorn.internal.runtime.ErrorManager; 59 import jdk.nashorn.internal.runtime.JSErrorType; 60 import jdk.nashorn.internal.runtime.JSType; 61 import jdk.nashorn.internal.runtime.ParserException; 62 import jdk.nashorn.internal.runtime.Source; 63 import jdk.nashorn.internal.runtime.options.Options; 64 65 /** 66 * Responsible for converting source content into a stream of tokens. 67 * 68 */ 69 @SuppressWarnings("fallthrough") 70 public class Lexer extends Scanner { 71 private static final long MIN_INT_L = Integer.MIN_VALUE; 72 private static final long MAX_INT_L = Integer.MAX_VALUE; 73 74 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 75 76 /** Content source. */ 77 private final Source source; 78 79 /** Buffered stream for tokens. */ 80 private final TokenStream stream; 81 82 /** True if here and edit strings are supported. */ 83 private final boolean scripting; 84 85 /** True if parsing in ECMAScript 6 mode. */ 86 private final boolean es6; 87 88 /** True if a nested scan. (scan to completion, no EOF.) */ 89 private final boolean nested; 90 91 /** Pending new line number and position. */ 92 int pendingLine; 93 94 /** Position of last EOL + 1. */ 95 private int linePosition; 96 97 /** Type of last token added. */ 98 private TokenType last; 99 100 private final boolean pauseOnFunctionBody; 101 private boolean pauseOnNextLeftBrace; 102 103 private int templateExpressionOpenBraces; 104 105 private static final String SPACETAB = " \t"; // ASCII space and tab 106 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 107 108 private static final String JAVASCRIPT_WHITESPACE_EOL = 109 LFCR + 110 "\u2028" + // line separator 111 "\u2029" // paragraph separator 112 ; 113 private static final String JAVASCRIPT_WHITESPACE = 114 SPACETAB + 115 JAVASCRIPT_WHITESPACE_EOL + 116 "\u000b" + // tabulation line 117 "\u000c" + // ff (ctrl-l) 118 "\u00a0" + // Latin-1 space 119 "\u1680" + // Ogham space mark 120 "\u180e" + // separator, Mongolian vowel 121 "\u2000" + // en quad 122 "\u2001" + // em quad 123 "\u2002" + // en space 124 "\u2003" + // em space 125 "\u2004" + // three-per-em space 126 "\u2005" + // four-per-em space 127 "\u2006" + // six-per-em space 128 "\u2007" + // figure space 129 "\u2008" + // punctuation space 130 "\u2009" + // thin space 131 "\u200a" + // hair space 132 "\u202f" + // narrow no-break space 133 "\u205f" + // medium mathematical space 134 "\u3000" + // ideographic space 135 "\ufeff" // byte order mark 136 ; 137 138 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 139 "\\u000a" + // line feed 140 "\\u000d" + // carriage return (ctrl-m) 141 "\\u2028" + // line separator 142 "\\u2029" + // paragraph separator 143 "\\u0009" + // tab 144 "\\u0020" + // ASCII space 145 "\\u000b" + // tabulation line 146 "\\u000c" + // ff (ctrl-l) 147 "\\u00a0" + // Latin-1 space 148 "\\u1680" + // Ogham space mark 149 "\\u180e" + // separator, Mongolian vowel 150 "\\u2000" + // en quad 151 "\\u2001" + // em quad 152 "\\u2002" + // en space 153 "\\u2003" + // em space 154 "\\u2004" + // three-per-em space 155 "\\u2005" + // four-per-em space 156 "\\u2006" + // six-per-em space 157 "\\u2007" + // figure space 158 "\\u2008" + // punctuation space 159 "\\u2009" + // thin space 160 "\\u200a" + // hair space 161 "\\u202f" + // narrow no-break space 162 "\\u205f" + // medium mathematical space 163 "\\u3000" + // ideographic space 164 "\\ufeff" // byte order mark 165 ; 166 167 static String unicodeEscape(final char ch) { 168 final StringBuilder sb = new StringBuilder(); 169 170 sb.append("\\u"); 171 172 final String hex = Integer.toHexString(ch); 173 for (int i = hex.length(); i < 4; i++) { 174 sb.append('0'); 175 } 176 sb.append(hex); 177 178 return sb.toString(); 179 } 180 181 /** 182 * Constructor 183 * 184 * @param source the source 185 * @param stream the token stream to lex 186 */ 187 public Lexer(final Source source, final TokenStream stream) { 188 this(source, stream, false, false); 189 } 190 191 /** 192 * Constructor 193 * 194 * @param source the source 195 * @param stream the token stream to lex 196 * @param scripting are we in scripting mode 197 * @param es6 are we in ECMAScript 6 mode 198 */ 199 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) { 200 this(source, 0, source.getLength(), stream, scripting, es6, false); 201 } 202 203 /** 204 * Constructor 205 * 206 * @param source the source 207 * @param start start position in source from which to start lexing 208 * @param len length of source segment to lex 209 * @param stream token stream to lex 210 * @param scripting are we in scripting mode 211 * @param es6 are we in ECMAScript 6 mode 212 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 213 * function body. This is used with the feature where the parser is skipping nested function bodies to 214 * avoid reading ahead unnecessarily when we skip the function bodies. 215 */ 216 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) { 217 super(source.getContent(), 1, start, len); 218 this.source = source; 219 this.stream = stream; 220 this.scripting = scripting; 221 this.es6 = es6; 222 this.nested = false; 223 this.pendingLine = 1; 224 this.last = EOL; 225 226 this.pauseOnFunctionBody = pauseOnFunctionBody; 227 } 228 229 private Lexer(final Lexer lexer, final State state) { 230 super(lexer, state); 231 232 source = lexer.source; 233 stream = lexer.stream; 234 scripting = lexer.scripting; 235 es6 = lexer.es6; 236 nested = true; 237 238 pendingLine = state.pendingLine; 239 linePosition = state.linePosition; 240 last = EOL; 241 pauseOnFunctionBody = false; 242 } 243 244 static class State extends Scanner.State { 245 /** Pending new line number and position. */ 246 public final int pendingLine; 247 248 /** Position of last EOL + 1. */ 249 public final int linePosition; 250 251 /** Type of last token added. */ 252 public final TokenType last; 253 254 /* 255 * Constructor. 256 */ 257 258 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 259 super(position, limit, line); 260 261 this.pendingLine = pendingLine; 262 this.linePosition = linePosition; 263 this.last = last; 264 } 265 } 266 267 /** 268 * Save the state of the scan. 269 * 270 * @return Captured state. 271 */ 272 @Override 273 State saveState() { 274 return new State(position, limit, line, pendingLine, linePosition, last); 275 } 276 277 /** 278 * Restore the state of the scan. 279 * 280 * @param state 281 * Captured state. 282 */ 283 void restoreState(final State state) { 284 super.restoreState(state); 285 286 pendingLine = state.pendingLine; 287 linePosition = state.linePosition; 288 last = state.last; 289 } 290 291 /** 292 * Add a new token to the stream. 293 * 294 * @param type 295 * Token type. 296 * @param start 297 * Start position. 298 * @param end 299 * End position. 300 */ 301 protected void add(final TokenType type, final int start, final int end) { 302 // Record last token. 303 last = type; 304 305 // Only emit the last EOL in a cluster. 306 if (type == EOL) { 307 pendingLine = end; 308 linePosition = start; 309 } else { 310 // Write any pending EOL to stream. 311 if (pendingLine != -1) { 312 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 313 pendingLine = -1; 314 } 315 316 // Write token to stream. 317 stream.put(Token.toDesc(type, start, end - start)); 318 } 319 } 320 321 /** 322 * Add a new token to the stream. 323 * 324 * @param type 325 * Token type. 326 * @param start 327 * Start position. 328 */ 329 protected void add(final TokenType type, final int start) { 330 add(type, start, position); 331 } 332 333 /** 334 * Return the String of valid whitespace characters for regular 335 * expressions in JavaScript 336 * @return regexp whitespace string 337 */ 338 public static String getWhitespaceRegExp() { 339 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 340 } 341 342 /** 343 * Skip end of line. 344 * 345 * @param addEOL true if EOL token should be recorded. 346 */ 347 private void skipEOL(final boolean addEOL) { 348 349 if (ch0 == '\r') { // detect \r\n pattern 350 skip(1); 351 if (ch0 == '\n') { 352 skip(1); 353 } 354 } else { // all other space, ch0 is guaranteed to be EOL or \0 355 skip(1); 356 } 357 358 // bump up line count 359 line++; 360 361 if (addEOL) { 362 // Add an EOL token. 363 add(EOL, position, line); 364 } 365 } 366 367 /** 368 * Skip over rest of line including end of line. 369 * 370 * @param addEOL true if EOL token should be recorded. 371 */ 372 private void skipLine(final boolean addEOL) { 373 // Ignore characters. 374 while (!isEOL(ch0) && !atEOF()) { 375 skip(1); 376 } 377 // Skip over end of line. 378 skipEOL(addEOL); 379 } 380 381 /** 382 * Test whether a char is valid JavaScript whitespace 383 * @param ch a char 384 * @return true if valid JavaScript whitespace 385 */ 386 public static boolean isJSWhitespace(final char ch) { 387 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 388 } 389 390 /** 391 * Test whether a char is valid JavaScript end of line 392 * @param ch a char 393 * @return true if valid JavaScript end of line 394 */ 395 public static boolean isJSEOL(final char ch) { 396 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 397 } 398 399 /** 400 * Test if char is a string delimiter, e.g. '\' or '"'. 401 * @param ch a char 402 * @return true if string delimiter 403 */ 404 protected boolean isStringDelimiter(final char ch) { 405 return ch == '\'' || ch == '"'; 406 } 407 408 /** 409 * Test if char is a template literal delimiter ('`'). 410 */ 411 private static boolean isTemplateDelimiter(char ch) { 412 return ch == '`'; 413 } 414 415 /** 416 * Test whether a char is valid JavaScript whitespace 417 * @param ch a char 418 * @return true if valid JavaScript whitespace 419 */ 420 protected boolean isWhitespace(final char ch) { 421 return Lexer.isJSWhitespace(ch); 422 } 423 424 /** 425 * Test whether a char is valid JavaScript end of line 426 * @param ch a char 427 * @return true if valid JavaScript end of line 428 */ 429 protected boolean isEOL(final char ch) { 430 return Lexer.isJSEOL(ch); 431 } 432 433 /** 434 * Skip over whitespace and detect end of line, adding EOL tokens if 435 * encountered. 436 * 437 * @param addEOL true if EOL tokens should be recorded. 438 */ 439 private void skipWhitespace(final boolean addEOL) { 440 while (isWhitespace(ch0)) { 441 if (isEOL(ch0)) { 442 skipEOL(addEOL); 443 } else { 444 skip(1); 445 } 446 } 447 } 448 449 /** 450 * Skip over comments. 451 * 452 * @return True if a comment. 453 */ 454 protected boolean skipComments() { 455 // Save the current position. 456 final int start = position; 457 458 if (ch0 == '/') { 459 // Is it a // comment. 460 if (ch1 == '/') { 461 // Skip over //. 462 skip(2); 463 464 boolean directiveComment = false; 465 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 466 directiveComment = true; 467 } 468 469 // Scan for EOL. 470 while (!atEOF() && !isEOL(ch0)) { 471 skip(1); 472 } 473 // Did detect a comment. 474 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 475 return true; 476 } else if (ch1 == '*') { 477 // Skip over /*. 478 skip(2); 479 // Scan for */. 480 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 481 // If end of line handle else skip character. 482 if (isEOL(ch0)) { 483 skipEOL(true); 484 } else { 485 skip(1); 486 } 487 } 488 489 if (atEOF()) { 490 // TODO - Report closing */ missing in parser. 491 add(ERROR, start); 492 } else { 493 // Skip */. 494 skip(2); 495 } 496 497 // Did detect a comment. 498 add(COMMENT, start); 499 return true; 500 } 501 } else if (ch0 == '#') { 502 assert scripting; 503 // shell style comment 504 // Skip over #. 505 skip(1); 506 // Scan for EOL. 507 while (!atEOF() && !isEOL(ch0)) { 508 skip(1); 509 } 510 // Did detect a comment. 511 add(COMMENT, start); 512 return true; 513 } 514 515 // Not a comment. 516 return false; 517 } 518 519 /** 520 * Convert a regex token to a token object. 521 * 522 * @param start Position in source content. 523 * @param length Length of regex token. 524 * @return Regex token object. 525 */ 526 public RegexToken valueOfPattern(final int start, final int length) { 527 // Save the current position. 528 final int savePosition = position; 529 // Reset to beginning of content. 530 reset(start); 531 // Buffer for recording characters. 532 final StringBuilder sb = new StringBuilder(length); 533 534 // Skip /. 535 skip(1); 536 boolean inBrackets = false; 537 // Scan for closing /, stopping at end of line. 538 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 539 // Skip over escaped character. 540 if (ch0 == '\\') { 541 sb.append(ch0); 542 sb.append(ch1); 543 skip(2); 544 } else { 545 if (ch0 == '[') { 546 inBrackets = true; 547 } else if (ch0 == ']') { 548 inBrackets = false; 549 } 550 551 // Skip literal character. 552 sb.append(ch0); 553 skip(1); 554 } 555 } 556 557 // Get pattern as string. 558 final String regex = sb.toString(); 559 560 // Skip /. 561 skip(1); 562 563 // Options as string. 564 final String options = source.getString(position, scanIdentifier()); 565 566 reset(savePosition); 567 568 // Compile the pattern. 569 return new RegexToken(regex, options); 570 } 571 572 /** 573 * Return true if the given token can be the beginning of a literal. 574 * 575 * @param token a token 576 * @return true if token can start a literal. 577 */ 578 public boolean canStartLiteral(final TokenType token) { 579 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 580 } 581 582 /** 583 * interface to receive line information for multi-line literals. 584 */ 585 protected interface LineInfoReceiver { 586 /** 587 * Receives line information 588 * @param line last line number 589 * @param linePosition position of last line 590 */ 591 public void lineInfo(int line, int linePosition); 592 } 593 594 /** 595 * Check whether the given token represents the beginning of a literal. If so scan 596 * the literal and return <tt>true</tt>, otherwise return false. 597 * 598 * @param token the token. 599 * @param startTokenType the token type. 600 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 601 * @return True if a literal beginning with startToken was found and scanned. 602 */ 603 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 604 // Check if it can be a literal. 605 if (!canStartLiteral(startTokenType)) { 606 return false; 607 } 608 // We break on ambiguous tokens so if we already moved on it can't be a literal. 609 if (stream.get(stream.last()) != token) { 610 return false; 611 } 612 // Rewind to token start position 613 reset(Token.descPosition(token)); 614 615 if (ch0 == '/') { 616 return scanRegEx(); 617 } else if (ch0 == '<') { 618 if (ch1 == '<') { 619 return scanHereString(lir); 620 } else if (Character.isJavaIdentifierStart(ch1)) { 621 return scanXMLLiteral(); 622 } 623 } 624 625 return false; 626 } 627 628 /** 629 * Scan over regex literal. 630 * 631 * @return True if a regex literal. 632 */ 633 private boolean scanRegEx() { 634 assert ch0 == '/'; 635 // Make sure it's not a comment. 636 if (ch1 != '/' && ch1 != '*') { 637 // Record beginning of literal. 638 final int start = position; 639 // Skip /. 640 skip(1); 641 boolean inBrackets = false; 642 643 // Scan for closing /, stopping at end of line. 644 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 645 // Skip over escaped character. 646 if (ch0 == '\\') { 647 skip(1); 648 if (isEOL(ch0)) { 649 reset(start); 650 return false; 651 } 652 skip(1); 653 } else { 654 if (ch0 == '[') { 655 inBrackets = true; 656 } else if (ch0 == ']') { 657 inBrackets = false; 658 } 659 660 // Skip literal character. 661 skip(1); 662 } 663 } 664 665 // If regex literal. 666 if (ch0 == '/') { 667 // Skip /. 668 skip(1); 669 670 // Skip over options. 671 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 672 skip(1); 673 } 674 675 // Add regex token. 676 add(REGEX, start); 677 // Regex literal detected. 678 return true; 679 } 680 681 // False start try again. 682 reset(start); 683 } 684 685 // Regex literal not detected. 686 return false; 687 } 688 689 /** 690 * Convert a digit to a integer. Can't use Character.digit since we are 691 * restricted to ASCII by the spec. 692 * 693 * @param ch Character to convert. 694 * @param base Numeric base. 695 * 696 * @return The converted digit or -1 if invalid. 697 */ 698 protected static int convertDigit(final char ch, final int base) { 699 int digit; 700 701 if ('0' <= ch && ch <= '9') { 702 digit = ch - '0'; 703 } else if ('A' <= ch && ch <= 'Z') { 704 digit = ch - 'A' + 10; 705 } else if ('a' <= ch && ch <= 'z') { 706 digit = ch - 'a' + 10; 707 } else { 708 return -1; 709 } 710 711 return digit < base ? digit : -1; 712 } 713 714 715 /** 716 * Get the value of a hexadecimal numeric sequence. 717 * 718 * @param length Number of digits. 719 * @param type Type of token to report against. 720 * @return Value of sequence or < 0 if no digits. 721 */ 722 private int hexSequence(final int length, final TokenType type) { 723 int value = 0; 724 725 for (int i = 0; i < length; i++) { 726 final int digit = convertDigit(ch0, 16); 727 728 if (digit == -1) { 729 error(Lexer.message("invalid.hex"), type, position, limit); 730 return i == 0 ? -1 : value; 731 } 732 733 value = digit | value << 4; 734 skip(1); 735 } 736 737 return value; 738 } 739 740 /** 741 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 742 * 743 * @return Value of sequence. 744 */ 745 private int octalSequence() { 746 int value = 0; 747 748 for (int i = 0; i < 3; i++) { 749 final int digit = convertDigit(ch0, 8); 750 751 if (digit == -1) { 752 break; 753 } 754 value = digit | value << 3; 755 skip(1); 756 757 if (i == 1 && value >= 32) { 758 break; 759 } 760 } 761 return value; 762 } 763 764 /** 765 * Convert a string to a JavaScript identifier. 766 * 767 * @param start Position in source content. 768 * @param length Length of token. 769 * @return Ident string or null if an error. 770 */ 771 private String valueOfIdent(final int start, final int length) throws RuntimeException { 772 // Save the current position. 773 final int savePosition = position; 774 // End of scan. 775 final int end = start + length; 776 // Reset to beginning of content. 777 reset(start); 778 // Buffer for recording characters. 779 final StringBuilder sb = new StringBuilder(length); 780 781 // Scan until end of line or end of file. 782 while (!atEOF() && position < end && !isEOL(ch0)) { 783 // If escape character. 784 if (ch0 == '\\' && ch1 == 'u') { 785 skip(2); 786 final int ch = hexSequence(4, TokenType.IDENT); 787 if (isWhitespace((char)ch)) { 788 return null; 789 } 790 if (ch < 0) { 791 sb.append('\\'); 792 sb.append('u'); 793 } else { 794 sb.append((char)ch); 795 } 796 } else { 797 // Add regular character. 798 sb.append(ch0); 799 skip(1); 800 } 801 } 802 803 // Restore position. 804 reset(savePosition); 805 806 return sb.toString(); 807 } 808 809 /** 810 * Scan over and identifier or keyword. Handles identifiers containing 811 * encoded Unicode chars. 812 * 813 * Example: 814 * 815 * var \u0042 = 44; 816 */ 817 private void scanIdentifierOrKeyword() { 818 // Record beginning of identifier. 819 final int start = position; 820 // Scan identifier. 821 final int length = scanIdentifier(); 822 // Check to see if it is a keyword. 823 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 824 if (type == FUNCTION && pauseOnFunctionBody) { 825 pauseOnNextLeftBrace = true; 826 } 827 // Add keyword or identifier token. 828 add(type, start); 829 } 830 831 /** 832 * Convert a string to a JavaScript string object. 833 * 834 * @param start Position in source content. 835 * @param length Length of token. 836 * @return JavaScript string object. 837 */ 838 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 839 // Save the current position. 840 final int savePosition = position; 841 // Calculate the end position. 842 final int end = start + length; 843 // Reset to beginning of string. 844 reset(start); 845 846 // Buffer for recording characters. 847 final StringBuilder sb = new StringBuilder(length); 848 849 // Scan until end of string. 850 while (position < end) { 851 // If escape character. 852 if (ch0 == '\\') { 853 skip(1); 854 855 final char next = ch0; 856 final int afterSlash = position; 857 858 skip(1); 859 860 // Special characters. 861 switch (next) { 862 case '0': 863 case '1': 864 case '2': 865 case '3': 866 case '4': 867 case '5': 868 case '6': 869 case '7': { 870 if (strict) { 871 // "\0" itself is allowed in strict mode. Only other 'real' 872 // octal escape sequences are not allowed (eg. "\02", "\31"). 873 // See section 7.8.4 String literals production EscapeSequence 874 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 875 error(Lexer.message("strict.no.octal"), STRING, position, limit); 876 } 877 } 878 reset(afterSlash); 879 // Octal sequence. 880 final int ch = octalSequence(); 881 882 if (ch < 0) { 883 sb.append('\\'); 884 sb.append('x'); 885 } else { 886 sb.append((char)ch); 887 } 888 break; 889 } 890 case 'n': 891 sb.append('\n'); 892 break; 893 case 't': 894 sb.append('\t'); 895 break; 896 case 'b': 897 sb.append('\b'); 898 break; 899 case 'f': 900 sb.append('\f'); 901 break; 902 case 'r': 903 sb.append('\r'); 904 break; 905 case '\'': 906 sb.append('\''); 907 break; 908 case '\"': 909 sb.append('\"'); 910 break; 911 case '\\': 912 sb.append('\\'); 913 break; 914 case '\r': // CR | CRLF 915 if (ch0 == '\n') { 916 skip(1); 917 } 918 // fall through 919 case '\n': // LF 920 case '\u2028': // LS 921 case '\u2029': // PS 922 // continue on the next line, slash-return continues string 923 // literal 924 break; 925 case 'x': { 926 // Hex sequence. 927 final int ch = hexSequence(2, STRING); 928 929 if (ch < 0) { 930 sb.append('\\'); 931 sb.append('x'); 932 } else { 933 sb.append((char)ch); 934 } 935 } 936 break; 937 case 'u': { 938 // Unicode sequence. 939 final int ch = hexSequence(4, STRING); 940 941 if (ch < 0) { 942 sb.append('\\'); 943 sb.append('u'); 944 } else { 945 sb.append((char)ch); 946 } 947 } 948 break; 949 case 'v': 950 sb.append('\u000B'); 951 break; 952 // All other characters. 953 default: 954 sb.append(next); 955 break; 956 } 957 } else if (ch0 == '\r') { 958 // Convert CR-LF or CR to LF line terminator. 959 sb.append('\n'); 960 skip(ch1 == '\n' ? 2 : 1); 961 } else { 962 // Add regular character. 963 sb.append(ch0); 964 skip(1); 965 } 966 } 967 968 // Restore position. 969 reset(savePosition); 970 971 return sb.toString(); 972 } 973 974 /** 975 * Scan over a string literal. 976 * @param add true if we are not just scanning but should actually modify the token stream 977 */ 978 protected void scanString(final boolean add) { 979 // Type of string. 980 TokenType type = STRING; 981 // Record starting quote. 982 final char quote = ch0; 983 // Skip over quote. 984 skip(1); 985 986 // Record beginning of string content. 987 final State stringState = saveState(); 988 989 // Scan until close quote or end of line. 990 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 991 // Skip over escaped character. 992 if (ch0 == '\\') { 993 type = ESCSTRING; 994 skip(1); 995 if (! isEscapeCharacter(ch0)) { 996 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 997 } 998 if (isEOL(ch0)) { 999 // Multiline string literal 1000 skipEOL(false); 1001 continue; 1002 } 1003 } 1004 // Skip literal character. 1005 skip(1); 1006 } 1007 1008 // If close quote. 1009 if (ch0 == quote) { 1010 // Skip close quote. 1011 skip(1); 1012 } else { 1013 error(Lexer.message("missing.close.quote"), STRING, position, limit); 1014 } 1015 1016 // If not just scanning. 1017 if (add) { 1018 // Record end of string. 1019 stringState.setLimit(position - 1); 1020 1021 if (scripting && !stringState.isEmpty()) { 1022 switch (quote) { 1023 case '`': 1024 // Mark the beginning of an exec string. 1025 add(EXECSTRING, stringState.position, stringState.limit); 1026 // Frame edit string with left brace. 1027 add(LBRACE, stringState.position, stringState.position); 1028 // Process edit string. 1029 editString(type, stringState); 1030 // Frame edit string with right brace. 1031 add(RBRACE, stringState.limit, stringState.limit); 1032 break; 1033 case '"': 1034 // Only edit double quoted strings. 1035 editString(type, stringState); 1036 break; 1037 case '\'': 1038 // Add string token without editing. 1039 add(type, stringState.position, stringState.limit); 1040 break; 1041 default: 1042 break; 1043 } 1044 } else { 1045 /// Add string token without editing. 1046 add(type, stringState.position, stringState.limit); 1047 } 1048 } 1049 } 1050 1051 /** 1052 * Scan over a template string literal. 1053 */ 1054 private void scanTemplate() { 1055 assert ch0 == '`'; 1056 TokenType type = TEMPLATE; 1057 1058 // Skip over quote and record beginning of string content. 1059 skip(1); 1060 State stringState = saveState(); 1061 1062 // Scan until close quote 1063 while (!atEOF()) { 1064 // Skip over escaped character. 1065 if (ch0 == '`') { 1066 skip(1); 1067 // Record end of string. 1068 stringState.setLimit(position - 1); 1069 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit); 1070 return; 1071 } else if (ch0 == '$' && ch1 == '{') { 1072 skip(2); 1073 stringState.setLimit(position - 2); 1074 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit); 1075 1076 // scan to RBRACE 1077 Lexer expressionLexer = new Lexer(this, saveState()); 1078 expressionLexer.templateExpressionOpenBraces = 1; 1079 expressionLexer.lexify(); 1080 restoreState(expressionLexer.saveState()); 1081 1082 // scan next middle or tail of the template literal 1083 assert ch0 == '}'; 1084 type = TEMPLATE_MIDDLE; 1085 1086 // Skip over rbrace and record beginning of string content. 1087 skip(1); 1088 stringState = saveState(); 1089 1090 continue; 1091 } else if (ch0 == '\\') { 1092 skip(1); 1093 // EscapeSequence 1094 if (!isEscapeCharacter(ch0)) { 1095 error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit); 1096 } 1097 if (isEOL(ch0)) { 1098 // LineContinuation 1099 skipEOL(false); 1100 continue; 1101 } 1102 } else if (isEOL(ch0)) { 1103 // LineTerminatorSequence 1104 skipEOL(false); 1105 continue; 1106 } 1107 1108 // Skip literal character. 1109 skip(1); 1110 } 1111 1112 error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit); 1113 } 1114 1115 /** 1116 * Is the given character a valid escape char after "\" ? 1117 * 1118 * @param ch character to be checked 1119 * @return if the given character is valid after "\" 1120 */ 1121 protected boolean isEscapeCharacter(final char ch) { 1122 return true; 1123 } 1124 1125 /** 1126 * Convert string to number. 1127 * 1128 * @param valueString String to convert. 1129 * @param radix Numeric base. 1130 * @return Converted number. 1131 */ 1132 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1133 try { 1134 final long value = Long.parseLong(valueString, radix); 1135 if(value >= MIN_INT_L && value <= MAX_INT_L) { 1136 return (int)value; 1137 } 1138 return value; 1139 } catch (final NumberFormatException e) { 1140 if (radix == 10) { 1141 return Double.valueOf(valueString); 1142 } 1143 1144 double value = 0.0; 1145 1146 for (int i = 0; i < valueString.length(); i++) { 1147 final char ch = valueString.charAt(i); 1148 // Preverified, should always be a valid digit. 1149 final int digit = convertDigit(ch, radix); 1150 value *= radix; 1151 value += digit; 1152 } 1153 1154 return value; 1155 } 1156 } 1157 1158 /** 1159 * Scan a number. 1160 */ 1161 protected void scanNumber() { 1162 // Record beginning of number. 1163 final int start = position; 1164 // Assume value is a decimal. 1165 TokenType type = DECIMAL; 1166 1167 // First digit of number. 1168 int digit = convertDigit(ch0, 10); 1169 1170 // If number begins with 0x. 1171 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1172 // Skip over 0xN. 1173 skip(3); 1174 // Skip over remaining digits. 1175 while (convertDigit(ch0, 16) != -1) { 1176 skip(1); 1177 } 1178 1179 type = HEXADECIMAL; 1180 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) { 1181 // Skip over 0oN. 1182 skip(3); 1183 // Skip over remaining digits. 1184 while (convertDigit(ch0, 8) != -1) { 1185 skip(1); 1186 } 1187 1188 type = OCTAL; 1189 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) { 1190 // Skip over 0bN. 1191 skip(3); 1192 // Skip over remaining digits. 1193 while (convertDigit(ch0, 2) != -1) { 1194 skip(1); 1195 } 1196 1197 type = BINARY_NUMBER; 1198 } else { 1199 // Check for possible octal constant. 1200 boolean octal = digit == 0; 1201 // Skip first digit if not leading '.'. 1202 if (digit != -1) { 1203 skip(1); 1204 } 1205 1206 // Skip remaining digits. 1207 while ((digit = convertDigit(ch0, 10)) != -1) { 1208 // Check octal only digits. 1209 octal = octal && digit < 8; 1210 // Skip digit. 1211 skip(1); 1212 } 1213 1214 if (octal && position - start > 1) { 1215 type = OCTAL_LEGACY; 1216 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1217 // Must be a double. 1218 if (ch0 == '.') { 1219 // Skip period. 1220 skip(1); 1221 // Skip mantissa. 1222 while (convertDigit(ch0, 10) != -1) { 1223 skip(1); 1224 } 1225 } 1226 1227 // Detect exponent. 1228 if (ch0 == 'E' || ch0 == 'e') { 1229 // Skip E. 1230 skip(1); 1231 // Detect and skip exponent sign. 1232 if (ch0 == '+' || ch0 == '-') { 1233 skip(1); 1234 } 1235 // Skip exponent. 1236 while (convertDigit(ch0, 10) != -1) { 1237 skip(1); 1238 } 1239 } 1240 1241 type = FLOATING; 1242 } 1243 } 1244 1245 if (Character.isJavaIdentifierStart(ch0)) { 1246 error(Lexer.message("missing.space.after.number"), type, position, 1); 1247 } 1248 1249 // Add number token. 1250 add(type, start); 1251 } 1252 1253 /** 1254 * Convert a regex token to a token object. 1255 * 1256 * @param start Position in source content. 1257 * @param length Length of regex token. 1258 * @return Regex token object. 1259 */ 1260 XMLToken valueOfXML(final int start, final int length) { 1261 return new XMLToken(source.getString(start, length)); 1262 } 1263 1264 /** 1265 * Scan over a XML token. 1266 * 1267 * @return TRUE if is an XML literal. 1268 */ 1269 private boolean scanXMLLiteral() { 1270 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1271 if (XML_LITERALS) { 1272 // Record beginning of xml expression. 1273 final int start = position; 1274 1275 int openCount = 0; 1276 1277 do { 1278 if (ch0 == '<') { 1279 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1280 skip(3); 1281 openCount--; 1282 } else if (Character.isJavaIdentifierStart(ch1)) { 1283 skip(2); 1284 openCount++; 1285 } else if (ch1 == '?') { 1286 skip(2); 1287 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1288 skip(4); 1289 } else { 1290 reset(start); 1291 return false; 1292 } 1293 1294 while (!atEOF() && ch0 != '>') { 1295 if (ch0 == '/' && ch1 == '>') { 1296 openCount--; 1297 skip(1); 1298 break; 1299 } else if (ch0 == '\"' || ch0 == '\'') { 1300 scanString(false); 1301 } else { 1302 skip(1); 1303 } 1304 } 1305 1306 if (ch0 != '>') { 1307 reset(start); 1308 return false; 1309 } 1310 1311 skip(1); 1312 } else if (atEOF()) { 1313 reset(start); 1314 return false; 1315 } else { 1316 skip(1); 1317 } 1318 } while (openCount > 0); 1319 1320 add(XML, start); 1321 return true; 1322 } 1323 1324 return false; 1325 } 1326 1327 /** 1328 * Scan over identifier characters. 1329 * 1330 * @return Length of identifier or zero if none found. 1331 */ 1332 private int scanIdentifier() { 1333 final int start = position; 1334 1335 // Make sure first character is valid start character. 1336 if (ch0 == '\\' && ch1 == 'u') { 1337 skip(2); 1338 final int ch = hexSequence(4, TokenType.IDENT); 1339 1340 if (!Character.isJavaIdentifierStart(ch)) { 1341 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1342 } 1343 } else if (!Character.isJavaIdentifierStart(ch0)) { 1344 // Not an identifier. 1345 return 0; 1346 } 1347 1348 // Make sure remaining characters are valid part characters. 1349 while (!atEOF()) { 1350 if (ch0 == '\\' && ch1 == 'u') { 1351 skip(2); 1352 final int ch = hexSequence(4, TokenType.IDENT); 1353 1354 if (!Character.isJavaIdentifierPart(ch)) { 1355 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1356 } 1357 } else if (Character.isJavaIdentifierPart(ch0)) { 1358 skip(1); 1359 } else { 1360 break; 1361 } 1362 } 1363 1364 // Length of identifier sequence. 1365 return position - start; 1366 } 1367 1368 /** 1369 * Compare two identifiers (in content) for equality. 1370 * 1371 * @param aStart Start of first identifier. 1372 * @param aLength Length of first identifier. 1373 * @param bStart Start of second identifier. 1374 * @param bLength Length of second identifier. 1375 * @return True if equal. 1376 */ 1377 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1378 if (aLength == bLength) { 1379 for (int i = 0; i < aLength; i++) { 1380 if (content[aStart + i] != content[bStart + i]) { 1381 return false; 1382 } 1383 } 1384 1385 return true; 1386 } 1387 1388 return false; 1389 } 1390 1391 /** 1392 * Detect if a line starts with a marker identifier. 1393 * 1394 * @param identStart Start of identifier. 1395 * @param identLength Length of identifier. 1396 * @return True if detected. 1397 */ 1398 private boolean hasHereMarker(final int identStart, final int identLength) { 1399 // Skip any whitespace. 1400 skipWhitespace(false); 1401 1402 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1403 } 1404 1405 /** 1406 * Lexer to service edit strings. 1407 */ 1408 private static class EditStringLexer extends Lexer { 1409 /** Type of string literals to emit. */ 1410 final TokenType stringType; 1411 1412 /* 1413 * Constructor. 1414 */ 1415 1416 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1417 super(lexer, stringState); 1418 1419 this.stringType = stringType; 1420 } 1421 1422 /** 1423 * Lexify the contents of the string. 1424 */ 1425 @Override 1426 public void lexify() { 1427 // Record start of string position. 1428 int stringStart = position; 1429 // Indicate that the priming first string has not been emitted. 1430 boolean primed = false; 1431 1432 while (true) { 1433 // Detect end of content. 1434 if (atEOF()) { 1435 break; 1436 } 1437 1438 // Honour escapes (should be well formed.) 1439 if (ch0 == '\\' && stringType == ESCSTRING) { 1440 skip(2); 1441 1442 continue; 1443 } 1444 1445 // If start of expression. 1446 if (ch0 == '$' && ch1 == '{') { 1447 if (!primed || stringStart != position) { 1448 if (primed) { 1449 add(ADD, stringStart, stringStart + 1); 1450 } 1451 1452 add(stringType, stringStart, position); 1453 primed = true; 1454 } 1455 1456 // Skip ${ 1457 skip(2); 1458 1459 // Save expression state. 1460 final State expressionState = saveState(); 1461 1462 // Start with one open brace. 1463 int braceCount = 1; 1464 1465 // Scan for the rest of the string. 1466 while (!atEOF()) { 1467 // If closing brace. 1468 if (ch0 == '}') { 1469 // Break only only if matching brace. 1470 if (--braceCount == 0) { 1471 break; 1472 } 1473 } else if (ch0 == '{') { 1474 // Bump up the brace count. 1475 braceCount++; 1476 } 1477 1478 // Skip to next character. 1479 skip(1); 1480 } 1481 1482 // If braces don't match then report an error. 1483 if (braceCount != 0) { 1484 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1485 } 1486 1487 // Mark end of expression. 1488 expressionState.setLimit(position); 1489 // Skip closing brace. 1490 skip(1); 1491 1492 // Start next string. 1493 stringStart = position; 1494 1495 // Concatenate expression. 1496 add(ADD, expressionState.position, expressionState.position + 1); 1497 add(LPAREN, expressionState.position, expressionState.position + 1); 1498 1499 // Scan expression. 1500 final Lexer lexer = new Lexer(this, expressionState); 1501 lexer.lexify(); 1502 1503 // Close out expression parenthesis. 1504 add(RPAREN, position - 1, position); 1505 1506 continue; 1507 } 1508 1509 // Next character in string. 1510 skip(1); 1511 } 1512 1513 // If there is any unemitted string portion. 1514 if (stringStart != limit) { 1515 // Concatenate remaining string. 1516 if (primed) { 1517 add(ADD, stringStart, 1); 1518 } 1519 1520 add(stringType, stringStart, limit); 1521 } 1522 } 1523 1524 } 1525 1526 /** 1527 * Edit string for nested expressions. 1528 * 1529 * @param stringType Type of string literals to emit. 1530 * @param stringState State of lexer at start of string. 1531 */ 1532 private void editString(final TokenType stringType, final State stringState) { 1533 // Use special lexer to scan string. 1534 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1535 lexer.lexify(); 1536 1537 // Need to keep lexer informed. 1538 last = stringType; 1539 } 1540 1541 /** 1542 * Scan over a here string. 1543 * 1544 * @return TRUE if is a here string. 1545 */ 1546 private boolean scanHereString(final LineInfoReceiver lir) { 1547 assert ch0 == '<' && ch1 == '<'; 1548 if (scripting) { 1549 // Record beginning of here string. 1550 final State saved = saveState(); 1551 1552 // << or <<< 1553 final boolean excludeLastEOL = ch2 != '<'; 1554 1555 if (excludeLastEOL) { 1556 skip(2); 1557 } else { 1558 skip(3); 1559 } 1560 1561 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1562 final char quoteChar = ch0; 1563 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1564 if (noStringEditing) { 1565 skip(1); 1566 } 1567 final int identStart = position; 1568 final int identLength = scanIdentifier(); 1569 if (noStringEditing) { 1570 if (ch0 != quoteChar) { 1571 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1572 restoreState(saved); 1573 return false; 1574 } 1575 skip(1); 1576 } 1577 1578 // Check for identifier. 1579 if (identLength == 0) { 1580 // Treat as shift. 1581 restoreState(saved); 1582 1583 return false; 1584 } 1585 1586 // Record rest of line. 1587 final State restState = saveState(); 1588 // keep line number updated 1589 int lastLine = line; 1590 1591 skipLine(false); 1592 lastLine++; 1593 int lastLinePosition = position; 1594 restState.setLimit(position); 1595 1596 // Record beginning of string. 1597 final State stringState = saveState(); 1598 int stringEnd = position; 1599 1600 // Hunt down marker. 1601 while (!atEOF()) { 1602 // Skip any whitespace. 1603 skipWhitespace(false); 1604 1605 if (hasHereMarker(identStart, identLength)) { 1606 break; 1607 } 1608 1609 skipLine(false); 1610 lastLine++; 1611 lastLinePosition = position; 1612 stringEnd = position; 1613 } 1614 1615 // notify last line information 1616 lir.lineInfo(lastLine, lastLinePosition); 1617 1618 // Record end of string. 1619 stringState.setLimit(stringEnd); 1620 1621 // If marker is missing. 1622 if (stringState.isEmpty() || atEOF()) { 1623 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1624 restoreState(saved); 1625 1626 return false; 1627 } 1628 1629 // Remove last end of line if specified. 1630 if (excludeLastEOL) { 1631 // Handles \n. 1632 if (content[stringEnd - 1] == '\n') { 1633 stringEnd--; 1634 } 1635 1636 // Handles \r and \r\n. 1637 if (content[stringEnd - 1] == '\r') { 1638 stringEnd--; 1639 } 1640 1641 // Update end of string. 1642 stringState.setLimit(stringEnd); 1643 } 1644 1645 // Edit string if appropriate. 1646 if (!noStringEditing && !stringState.isEmpty()) { 1647 editString(STRING, stringState); 1648 } else { 1649 // Add here string. 1650 add(STRING, stringState.position, stringState.limit); 1651 } 1652 1653 // Scan rest of original line. 1654 final Lexer restLexer = new Lexer(this, restState); 1655 1656 restLexer.lexify(); 1657 1658 return true; 1659 } 1660 1661 return false; 1662 } 1663 1664 /** 1665 * Breaks source content down into lex units, adding tokens to the token 1666 * stream. The routine scans until the stream buffer is full. Can be called 1667 * repeatedly until EOF is detected. 1668 */ 1669 public void lexify() { 1670 while (!stream.isFull() || nested) { 1671 // Skip over whitespace. 1672 skipWhitespace(true); 1673 1674 // Detect end of file. 1675 if (atEOF()) { 1676 if (!nested) { 1677 // Add an EOF token at the end. 1678 add(EOF, position); 1679 } 1680 1681 break; 1682 } 1683 1684 // Check for comments. Note that we don't scan for regexp and other literals here as 1685 // we may not have enough context to distinguish them from similar looking operators. 1686 // Instead we break on ambiguous operators below and let the parser decide. 1687 if (ch0 == '/' && skipComments()) { 1688 continue; 1689 } 1690 1691 if (scripting && ch0 == '#' && skipComments()) { 1692 continue; 1693 } 1694 1695 // TokenType for lookup of delimiter or operator. 1696 TokenType type; 1697 1698 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1699 // '.' followed by digit. 1700 // Scan and add a number. 1701 scanNumber(); 1702 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1703 if (templateExpressionOpenBraces > 0) { 1704 if (type == LBRACE) { 1705 templateExpressionOpenBraces++; 1706 } else if (type == RBRACE) { 1707 if (--templateExpressionOpenBraces == 0) { 1708 break; 1709 } 1710 } 1711 } 1712 1713 // Get the number of characters in the token. 1714 final int typeLength = type.getLength(); 1715 // Skip that many characters. 1716 skip(typeLength); 1717 // Add operator token. 1718 add(type, position - typeLength); 1719 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1720 // We break to let the parser decide what it is. 1721 if (canStartLiteral(type)) { 1722 break; 1723 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1724 pauseOnNextLeftBrace = false; 1725 break; 1726 } 1727 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1728 // Scan and add identifier or keyword. 1729 scanIdentifierOrKeyword(); 1730 } else if (isStringDelimiter(ch0)) { 1731 // Scan and add a string. 1732 scanString(true); 1733 } else if (Character.isDigit(ch0)) { 1734 // Scan and add a number. 1735 scanNumber(); 1736 } else if (isTemplateDelimiter(ch0) && es6) { 1737 // Scan and add template in ES6 mode. 1738 scanTemplate(); 1739 } else if (isTemplateDelimiter(ch0) && scripting) { 1740 // Scan and add an exec string ('`') in scripting mode. 1741 scanString(true); 1742 } else { 1743 // Don't recognize this character. 1744 skip(1); 1745 add(ERROR, position - 1); 1746 } 1747 } 1748 } 1749 1750 /** 1751 * Return value of token given its token descriptor. 1752 * 1753 * @param token Token descriptor. 1754 * @return JavaScript value. 1755 */ 1756 Object getValueOf(final long token, final boolean strict) { 1757 final int start = Token.descPosition(token); 1758 final int len = Token.descLength(token); 1759 1760 switch (Token.descType(token)) { 1761 case DECIMAL: 1762 return Lexer.valueOf(source.getString(start, len), 10); // number 1763 case HEXADECIMAL: 1764 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1765 case OCTAL_LEGACY: 1766 return Lexer.valueOf(source.getString(start, len), 8); // number 1767 case OCTAL: 1768 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number 1769 case BINARY_NUMBER: 1770 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number 1771 case FLOATING: 1772 final String str = source.getString(start, len); 1773 final double value = Double.valueOf(str); 1774 if (str.indexOf('.') != -1) { 1775 return value; //number 1776 } 1777 //anything without an explicit decimal point is still subject to a 1778 //"representable as int or long" check. Then the programmer does not 1779 //explicitly code something as a double. For example new Color(int, int, int) 1780 //and new Color(float, float, float) will get ambiguous for cases like 1781 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1782 //yet we don't want e.g. 1e6 to be a double unnecessarily 1783 if (JSType.isStrictlyRepresentableAsInt(value)) { 1784 return (int)value; 1785 } else if (JSType.isStrictlyRepresentableAsLong(value)) { 1786 return (long)value; 1787 } 1788 return value; 1789 case STRING: 1790 return source.getString(start, len); // String 1791 case ESCSTRING: 1792 return valueOfString(start, len, strict); // String 1793 case IDENT: 1794 return valueOfIdent(start, len); // String 1795 case REGEX: 1796 return valueOfPattern(start, len); // RegexToken::LexerToken 1797 case TEMPLATE: 1798 case TEMPLATE_HEAD: 1799 case TEMPLATE_MIDDLE: 1800 case TEMPLATE_TAIL: 1801 return valueOfString(start, len, true); // String 1802 case XML: 1803 return valueOfXML(start, len); // XMLToken::LexerToken 1804 case DIRECTIVE_COMMENT: 1805 return source.getString(start, len); 1806 default: 1807 break; 1808 } 1809 1810 return null; 1811 } 1812 1813 /** 1814 * Get the raw string value of a template literal string part. 1815 * 1816 * @param token template string token 1817 * @return raw string 1818 */ 1819 public String valueOfRawString(final long token) { 1820 final int start = Token.descPosition(token); 1821 final int length = Token.descLength(token); 1822 1823 // Save the current position. 1824 final int savePosition = position; 1825 // Calculate the end position. 1826 final int end = start + length; 1827 // Reset to beginning of string. 1828 reset(start); 1829 1830 // Buffer for recording characters. 1831 final StringBuilder sb = new StringBuilder(length); 1832 1833 // Scan until end of string. 1834 while (position < end) { 1835 if (ch0 == '\r') { 1836 // Convert CR-LF or CR to LF line terminator. 1837 sb.append('\n'); 1838 skip(ch1 == '\n' ? 2 : 1); 1839 } else { 1840 // Add regular character. 1841 sb.append(ch0); 1842 skip(1); 1843 } 1844 } 1845 1846 // Restore position. 1847 reset(savePosition); 1848 1849 return sb.toString(); 1850 } 1851 1852 /** 1853 * Get the correctly localized error message for a given message id format arguments 1854 * @param msgId message id 1855 * @param args format arguments 1856 * @return message 1857 */ 1858 protected static String message(final String msgId, final String... args) { 1859 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1860 } 1861 1862 /** 1863 * Generate a runtime exception 1864 * 1865 * @param message error message 1866 * @param type token type 1867 * @param start start position of lexed error 1868 * @param length length of lexed error 1869 * @throws ParserException unconditionally 1870 */ 1871 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1872 final long token = Token.toDesc(type, start, length); 1873 final int pos = Token.descPosition(token); 1874 final int lineNum = source.getLine(pos); 1875 final int columnNum = source.getColumn(pos); 1876 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1877 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1878 } 1879 1880 /** 1881 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1882 * This is the abstract superclass 1883 */ 1884 public static abstract class LexerToken implements Serializable { 1885 private static final long serialVersionUID = 1L; 1886 1887 private final String expression; 1888 1889 /** 1890 * Constructor 1891 * @param expression token expression 1892 */ 1893 protected LexerToken(final String expression) { 1894 this.expression = expression; 1895 } 1896 1897 /** 1898 * Get the expression 1899 * @return expression 1900 */ 1901 public String getExpression() { 1902 return expression; 1903 } 1904 } 1905 1906 /** 1907 * Temporary container for regular expressions. 1908 */ 1909 public static class RegexToken extends LexerToken { 1910 private static final long serialVersionUID = 1L; 1911 1912 /** Options. */ 1913 private final String options; 1914 1915 /** 1916 * Constructor. 1917 * 1918 * @param expression regexp expression 1919 * @param options regexp options 1920 */ 1921 public RegexToken(final String expression, final String options) { 1922 super(expression); 1923 this.options = options; 1924 } 1925 1926 /** 1927 * Get regexp options 1928 * @return options 1929 */ 1930 public String getOptions() { 1931 return options; 1932 } 1933 1934 @Override 1935 public String toString() { 1936 return '/' + getExpression() + '/' + options; 1937 } 1938 } 1939 1940 /** 1941 * Temporary container for XML expression. 1942 */ 1943 public static class XMLToken extends LexerToken { 1944 private static final long serialVersionUID = 1L; 1945 1946 /** 1947 * Constructor. 1948 * 1949 * @param expression XML expression 1950 */ 1951 public XMLToken(final String expression) { 1952 super(expression); 1953 } 1954 } 1955 }