1 /* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.ADD; 29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER; 30 import static jdk.nashorn.internal.parser.TokenType.COMMENT; 31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 33 import static jdk.nashorn.internal.parser.TokenType.EOF; 34 import static jdk.nashorn.internal.parser.TokenType.EOL; 35 import static jdk.nashorn.internal.parser.TokenType.ERROR; 36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 38 import static jdk.nashorn.internal.parser.TokenType.FLOATING; 39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 41 import static jdk.nashorn.internal.parser.TokenType.LBRACE; 42 import static jdk.nashorn.internal.parser.TokenType.LPAREN; 43 import static jdk.nashorn.internal.parser.TokenType.OCTAL; 44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY; 45 import static jdk.nashorn.internal.parser.TokenType.RBRACE; 46 import static jdk.nashorn.internal.parser.TokenType.REGEX; 47 import static jdk.nashorn.internal.parser.TokenType.RPAREN; 48 import static jdk.nashorn.internal.parser.TokenType.STRING; 49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE; 50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD; 51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE; 52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL; 53 import static jdk.nashorn.internal.parser.TokenType.XML; 54 55 import java.io.Serializable; 56 57 import jdk.nashorn.internal.runtime.ECMAErrors; 58 import jdk.nashorn.internal.runtime.ErrorManager; 59 import jdk.nashorn.internal.runtime.JSErrorType; 60 import jdk.nashorn.internal.runtime.JSType; 61 import jdk.nashorn.internal.runtime.ParserException; 62 import jdk.nashorn.internal.runtime.Source; 63 import jdk.nashorn.internal.runtime.options.Options; 64 65 /** 66 * Responsible for converting source content into a stream of tokens. 67 * 68 */ 69 @SuppressWarnings("fallthrough") 70 public class Lexer extends Scanner { 71 private static final long MIN_INT_L = Integer.MIN_VALUE; 72 private static final long MAX_INT_L = Integer.MAX_VALUE; 73 74 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 75 76 /** Content source. */ 77 private final Source source; 78 79 /** Buffered stream for tokens. */ 80 private final TokenStream stream; 81 82 /** True if here and edit strings are supported. */ 83 private final boolean scripting; 84 85 /** True if parsing in ECMAScript 6 mode. */ 86 private final boolean es6; 87 88 /** True if a nested scan. (scan to completion, no EOF.) */ 89 private final boolean nested; 90 91 /** Pending new line number and position. */ 92 int pendingLine; 93 94 /** Position of last EOL + 1. */ 95 private int linePosition; 96 97 /** Type of last token added. */ 98 private TokenType last; 99 100 private final boolean pauseOnFunctionBody; 101 private boolean pauseOnNextLeftBrace; 102 103 private int templateExpressionOpenBraces; 104 105 private static final String JAVASCRIPT_OTHER_WHITESPACE = 106 "\u2028" + // line separator 107 "\u2029" + // paragraph separator 108 "\u00a0" + // Latin-1 space 109 "\u1680" + // Ogham space mark 110 "\u180e" + // separator, Mongolian vowel 111 "\u2000" + // en quad 112 "\u2001" + // em quad 113 "\u2002" + // en space 114 "\u2003" + // em space 115 "\u2004" + // three-per-em space 116 "\u2005" + // four-per-em space 117 "\u2006" + // six-per-em space 118 "\u2007" + // figure space 119 "\u2008" + // punctuation space 120 "\u2009" + // thin space 121 "\u200a" + // hair space 122 "\u202f" + // narrow no-break space 123 "\u205f" + // medium mathematical space 124 "\u3000" + // ideographic space 125 "\ufeff" // byte order mark 126 ; 127 128 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 129 "\\u000a" + // line feed 130 "\\u000d" + // carriage return (ctrl-m) 131 "\\u2028" + // line separator 132 "\\u2029" + // paragraph separator 133 "\\u0009" + // tab 134 "\\u0020" + // ASCII space 135 "\\u000b" + // tabulation line 136 "\\u000c" + // ff (ctrl-l) 137 "\\u00a0" + // Latin-1 space 138 "\\u1680" + // Ogham space mark 139 "\\u180e" + // separator, Mongolian vowel 140 "\\u2000" + // en quad 141 "\\u2001" + // em quad 142 "\\u2002" + // en space 143 "\\u2003" + // em space 144 "\\u2004" + // three-per-em space 145 "\\u2005" + // four-per-em space 146 "\\u2006" + // six-per-em space 147 "\\u2007" + // figure space 148 "\\u2008" + // punctuation space 149 "\\u2009" + // thin space 150 "\\u200a" + // hair space 151 "\\u202f" + // narrow no-break space 152 "\\u205f" + // medium mathematical space 153 "\\u3000" + // ideographic space 154 "\\ufeff" // byte order mark 155 ; 156 157 static String unicodeEscape(final char ch) { 158 final StringBuilder sb = new StringBuilder(); 159 160 sb.append("\\u"); 161 162 final String hex = Integer.toHexString(ch); 163 for (int i = hex.length(); i < 4; i++) { 164 sb.append('0'); 165 } 166 sb.append(hex); 167 168 return sb.toString(); 169 } 170 171 /** 172 * Constructor 173 * 174 * @param source the source 175 * @param stream the token stream to lex 176 */ 177 public Lexer(final Source source, final TokenStream stream) { 178 this(source, stream, false, false); 179 } 180 181 /** 182 * Constructor 183 * 184 * @param source the source 185 * @param stream the token stream to lex 186 * @param scripting are we in scripting mode 187 * @param es6 are we in ECMAScript 6 mode 188 */ 189 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) { 190 this(source, 0, source.getLength(), stream, scripting, es6, false); 191 } 192 193 /** 194 * Constructor 195 * 196 * @param source the source 197 * @param start start position in source from which to start lexing 198 * @param len length of source segment to lex 199 * @param stream token stream to lex 200 * @param scripting are we in scripting mode 201 * @param es6 are we in ECMAScript 6 mode 202 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 203 * function body. This is used with the feature where the parser is skipping nested function bodies to 204 * avoid reading ahead unnecessarily when we skip the function bodies. 205 */ 206 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) { 207 super(source.getContent(), 1, start, len); 208 this.source = source; 209 this.stream = stream; 210 this.scripting = scripting; 211 this.es6 = es6; 212 this.nested = false; 213 this.pendingLine = 1; 214 this.last = EOL; 215 216 this.pauseOnFunctionBody = pauseOnFunctionBody; 217 } 218 219 private Lexer(final Lexer lexer, final State state) { 220 super(lexer, state); 221 222 source = lexer.source; 223 stream = lexer.stream; 224 scripting = lexer.scripting; 225 es6 = lexer.es6; 226 nested = true; 227 228 pendingLine = state.pendingLine; 229 linePosition = state.linePosition; 230 last = EOL; 231 pauseOnFunctionBody = false; 232 } 233 234 static class State extends Scanner.State { 235 /** Pending new line number and position. */ 236 public final int pendingLine; 237 238 /** Position of last EOL + 1. */ 239 public final int linePosition; 240 241 /** Type of last token added. */ 242 public final TokenType last; 243 244 /* 245 * Constructor. 246 */ 247 248 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 249 super(position, limit, line); 250 251 this.pendingLine = pendingLine; 252 this.linePosition = linePosition; 253 this.last = last; 254 } 255 } 256 257 /** 258 * Save the state of the scan. 259 * 260 * @return Captured state. 261 */ 262 @Override 263 State saveState() { 264 return new State(position, limit, line, pendingLine, linePosition, last); 265 } 266 267 /** 268 * Restore the state of the scan. 269 * 270 * @param state 271 * Captured state. 272 */ 273 void restoreState(final State state) { 274 super.restoreState(state); 275 276 pendingLine = state.pendingLine; 277 linePosition = state.linePosition; 278 last = state.last; 279 } 280 281 /** 282 * Add a new token to the stream. 283 * 284 * @param type 285 * Token type. 286 * @param start 287 * Start position. 288 * @param end 289 * End position. 290 */ 291 protected void add(final TokenType type, final int start, final int end) { 292 // Record last token. 293 last = type; 294 295 // Only emit the last EOL in a cluster. 296 if (type == EOL) { 297 pendingLine = end; 298 linePosition = start; 299 } else { 300 // Write any pending EOL to stream. 301 if (pendingLine != -1) { 302 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 303 pendingLine = -1; 304 } 305 306 // Write token to stream. 307 stream.put(Token.toDesc(type, start, end - start)); 308 } 309 } 310 311 /** 312 * Add a new token to the stream. 313 * 314 * @param type 315 * Token type. 316 * @param start 317 * Start position. 318 */ 319 protected void add(final TokenType type, final int start) { 320 add(type, start, position); 321 } 322 323 /** 324 * Return the String of valid whitespace characters for regular 325 * expressions in JavaScript 326 * @return regexp whitespace string 327 */ 328 public static String getWhitespaceRegExp() { 329 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 330 } 331 332 /** 333 * Skip end of line. 334 * 335 * @param addEOL true if EOL token should be recorded. 336 */ 337 private void skipEOL(final boolean addEOL) { 338 339 if (ch0 == '\r') { // detect \r\n pattern 340 skip(1); 341 if (ch0 == '\n') { 342 skip(1); 343 } 344 } else { // all other space, ch0 is guaranteed to be EOL or \0 345 skip(1); 346 } 347 348 // bump up line count 349 line++; 350 351 if (addEOL) { 352 // Add an EOL token. 353 add(EOL, position, line); 354 } 355 } 356 357 /** 358 * Skip over rest of line including end of line. 359 * 360 * @param addEOL true if EOL token should be recorded. 361 */ 362 private void skipLine(final boolean addEOL) { 363 // Ignore characters. 364 while (!isEOL(ch0) && !atEOF()) { 365 skip(1); 366 } 367 // Skip over end of line. 368 skipEOL(addEOL); 369 } 370 371 /** 372 * Test whether a char is valid JavaScript whitespace 373 * @param ch a char 374 * @return true if valid JavaScript whitespace 375 */ 376 public static boolean isJSWhitespace(final char ch) { 377 return ch == ' ' // space 378 || ch >= '\t' && ch <= '\r' // 0x09..0x0d: tab, line feed, tabulation line, ff, carriage return 379 || ch >= 160 && isOtherJSWhitespace(ch); 380 } 381 382 private static boolean isOtherJSWhitespace(final char ch) { 383 return JAVASCRIPT_OTHER_WHITESPACE.indexOf(ch) != -1; 384 } 385 386 /** 387 * Test whether a char is valid JavaScript end of line 388 * @param ch a char 389 * @return true if valid JavaScript end of line 390 */ 391 public static boolean isJSEOL(final char ch) { 392 return ch == '\n' // line feed 393 || ch == '\r' // carriage return (ctrl-m) 394 || ch == '\u2028' // line separator 395 || ch == '\u2029'; // paragraph separator 396 } 397 398 /** 399 * Test if char is a string delimiter, e.g. '\' or '"'. 400 * @param ch a char 401 * @return true if string delimiter 402 */ 403 protected boolean isStringDelimiter(final char ch) { 404 return ch == '\'' || ch == '"'; 405 } 406 407 /** 408 * Test if char is a template literal delimiter ('`'). 409 */ 410 private static boolean isTemplateDelimiter(final char ch) { 411 return ch == '`'; 412 } 413 414 /** 415 * Test whether a char is valid JavaScript whitespace 416 * @param ch a char 417 * @return true if valid JavaScript whitespace 418 */ 419 protected boolean isWhitespace(final char ch) { 420 return Lexer.isJSWhitespace(ch); 421 } 422 423 /** 424 * Test whether a char is valid JavaScript end of line 425 * @param ch a char 426 * @return true if valid JavaScript end of line 427 */ 428 protected boolean isEOL(final char ch) { 429 return Lexer.isJSEOL(ch); 430 } 431 432 /** 433 * Skip over whitespace and detect end of line, adding EOL tokens if 434 * encountered. 435 * 436 * @param addEOL true if EOL tokens should be recorded. 437 */ 438 private void skipWhitespace(final boolean addEOL) { 439 while (isWhitespace(ch0)) { 440 if (isEOL(ch0)) { 441 skipEOL(addEOL); 442 } else { 443 skip(1); 444 } 445 } 446 } 447 448 /** 449 * Skip over comments. 450 * 451 * @return True if a comment. 452 */ 453 protected boolean skipComments() { 454 // Save the current position. 455 final int start = position; 456 457 if (ch0 == '/') { 458 // Is it a // comment. 459 if (ch1 == '/') { 460 // Skip over //. 461 skip(2); 462 463 boolean directiveComment = false; 464 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 465 directiveComment = true; 466 } 467 468 // Scan for EOL. 469 while (!atEOF() && !isEOL(ch0)) { 470 skip(1); 471 } 472 // Did detect a comment. 473 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 474 return true; 475 } else if (ch1 == '*') { 476 // Skip over /*. 477 skip(2); 478 // Scan for */. 479 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 480 // If end of line handle else skip character. 481 if (isEOL(ch0)) { 482 skipEOL(true); 483 } else { 484 skip(1); 485 } 486 } 487 488 if (atEOF()) { 489 // TODO - Report closing */ missing in parser. 490 add(ERROR, start); 491 } else { 492 // Skip */. 493 skip(2); 494 } 495 496 // Did detect a comment. 497 add(COMMENT, start); 498 return true; 499 } 500 } else if (ch0 == '#') { 501 assert scripting; 502 // shell style comment 503 // Skip over #. 504 skip(1); 505 // Scan for EOL. 506 while (!atEOF() && !isEOL(ch0)) { 507 skip(1); 508 } 509 // Did detect a comment. 510 add(COMMENT, start); 511 return true; 512 } 513 514 // Not a comment. 515 return false; 516 } 517 518 /** 519 * Convert a regex token to a token object. 520 * 521 * @param start Position in source content. 522 * @param length Length of regex token. 523 * @return Regex token object. 524 */ 525 public RegexToken valueOfPattern(final int start, final int length) { 526 // Save the current position. 527 final int savePosition = position; 528 // Reset to beginning of content. 529 reset(start); 530 // Buffer for recording characters. 531 final StringBuilder sb = new StringBuilder(length); 532 533 // Skip /. 534 skip(1); 535 boolean inBrackets = false; 536 // Scan for closing /, stopping at end of line. 537 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 538 // Skip over escaped character. 539 if (ch0 == '\\') { 540 sb.append(ch0); 541 sb.append(ch1); 542 skip(2); 543 } else { 544 if (ch0 == '[') { 545 inBrackets = true; 546 } else if (ch0 == ']') { 547 inBrackets = false; 548 } 549 550 // Skip literal character. 551 sb.append(ch0); 552 skip(1); 553 } 554 } 555 556 // Get pattern as string. 557 final String regex = sb.toString(); 558 559 // Skip /. 560 skip(1); 561 562 // Options as string. 563 final String options = source.getString(position, scanIdentifier()); 564 565 reset(savePosition); 566 567 // Compile the pattern. 568 return new RegexToken(regex, options); 569 } 570 571 /** 572 * Return true if the given token can be the beginning of a literal. 573 * 574 * @param token a token 575 * @return true if token can start a literal. 576 */ 577 public boolean canStartLiteral(final TokenType token) { 578 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 579 } 580 581 /** 582 * interface to receive line information for multi-line literals. 583 */ 584 protected interface LineInfoReceiver { 585 /** 586 * Receives line information 587 * @param line last line number 588 * @param linePosition position of last line 589 */ 590 public void lineInfo(int line, int linePosition); 591 } 592 593 /** 594 * Check whether the given token represents the beginning of a literal. If so scan 595 * the literal and return <code>true</code>, otherwise return false. 596 * 597 * @param token the token. 598 * @param startTokenType the token type. 599 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 600 * @return True if a literal beginning with startToken was found and scanned. 601 */ 602 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 603 // Check if it can be a literal. 604 if (!canStartLiteral(startTokenType)) { 605 return false; 606 } 607 // We break on ambiguous tokens so if we already moved on it can't be a literal. 608 if (stream.get(stream.last()) != token) { 609 return false; 610 } 611 612 // Record current position in case multiple heredocs start on this line - see JDK-8073653 613 final State state = saveState(); 614 // Rewind to token start position 615 reset(Token.descPosition(token)); 616 617 if (ch0 == '/') { 618 return scanRegEx(); 619 } else if (ch0 == '<') { 620 if (ch1 == '<') { 621 return scanHereString(lir, state); 622 } else if (Character.isJavaIdentifierStart(ch1)) { 623 return scanXMLLiteral(); 624 } 625 } 626 627 return false; 628 } 629 630 /** 631 * Scan over regex literal. 632 * 633 * @return True if a regex literal. 634 */ 635 private boolean scanRegEx() { 636 assert ch0 == '/'; 637 // Make sure it's not a comment. 638 if (ch1 != '/' && ch1 != '*') { 639 // Record beginning of literal. 640 final int start = position; 641 // Skip /. 642 skip(1); 643 boolean inBrackets = false; 644 645 // Scan for closing /, stopping at end of line. 646 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 647 // Skip over escaped character. 648 if (ch0 == '\\') { 649 skip(1); 650 if (isEOL(ch0)) { 651 reset(start); 652 return false; 653 } 654 skip(1); 655 } else { 656 if (ch0 == '[') { 657 inBrackets = true; 658 } else if (ch0 == ']') { 659 inBrackets = false; 660 } 661 662 // Skip literal character. 663 skip(1); 664 } 665 } 666 667 // If regex literal. 668 if (ch0 == '/') { 669 // Skip /. 670 skip(1); 671 672 // Skip over options. 673 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 674 skip(1); 675 } 676 677 // Add regex token. 678 add(REGEX, start); 679 // Regex literal detected. 680 return true; 681 } 682 683 // False start try again. 684 reset(start); 685 } 686 687 // Regex literal not detected. 688 return false; 689 } 690 691 /** 692 * Convert a digit to a integer. Can't use Character.digit since we are 693 * restricted to ASCII by the spec. 694 * 695 * @param ch Character to convert. 696 * @param base Numeric base. 697 * 698 * @return The converted digit or -1 if invalid. 699 */ 700 protected static int convertDigit(final char ch, final int base) { 701 int digit; 702 703 if ('0' <= ch && ch <= '9') { 704 digit = ch - '0'; 705 } else if ('A' <= ch && ch <= 'Z') { 706 digit = ch - 'A' + 10; 707 } else if ('a' <= ch && ch <= 'z') { 708 digit = ch - 'a' + 10; 709 } else { 710 return -1; 711 } 712 713 return digit < base ? digit : -1; 714 } 715 716 717 /** 718 * Get the value of a hexadecimal numeric sequence. 719 * 720 * @param length Number of digits. 721 * @param type Type of token to report against. 722 * @return Value of sequence or < 0 if no digits. 723 */ 724 private int hexSequence(final int length, final TokenType type) { 725 int value = 0; 726 727 for (int i = 0; i < length; i++) { 728 final int digit = convertDigit(ch0, 16); 729 730 if (digit == -1) { 731 error(Lexer.message("invalid.hex"), type, position, limit); 732 return i == 0 ? -1 : value; 733 } 734 735 value = digit | value << 4; 736 skip(1); 737 } 738 739 return value; 740 } 741 742 /** 743 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 744 * 745 * @return Value of sequence. 746 */ 747 private int octalSequence() { 748 int value = 0; 749 750 for (int i = 0; i < 3; i++) { 751 final int digit = convertDigit(ch0, 8); 752 753 if (digit == -1) { 754 break; 755 } 756 value = digit | value << 3; 757 skip(1); 758 759 if (i == 1 && value >= 32) { 760 break; 761 } 762 } 763 return value; 764 } 765 766 /** 767 * Convert a string to a JavaScript identifier. 768 * 769 * @param start Position in source content. 770 * @param length Length of token. 771 * @return Ident string or null if an error. 772 */ 773 private String valueOfIdent(final int start, final int length) throws RuntimeException { 774 // Save the current position. 775 final int savePosition = position; 776 // End of scan. 777 final int end = start + length; 778 // Reset to beginning of content. 779 reset(start); 780 // Buffer for recording characters. 781 final StringBuilder sb = new StringBuilder(length); 782 783 // Scan until end of line or end of file. 784 while (!atEOF() && position < end && !isEOL(ch0)) { 785 // If escape character. 786 if (ch0 == '\\' && ch1 == 'u') { 787 skip(2); 788 final int ch = hexSequence(4, TokenType.IDENT); 789 assert ! isWhitespace((char)ch); 790 assert ch >= 0; 791 sb.append((char)ch); 792 } else { 793 // Add regular character. 794 sb.append(ch0); 795 skip(1); 796 } 797 } 798 799 // Restore position. 800 reset(savePosition); 801 802 return sb.toString(); 803 } 804 805 /** 806 * Scan over and identifier or keyword. Handles identifiers containing 807 * encoded Unicode chars. 808 * 809 * Example: 810 * 811 * var \u0042 = 44; 812 */ 813 private void scanIdentifierOrKeyword() { 814 // Record beginning of identifier. 815 final int start = position; 816 // Scan identifier. 817 final int length = scanIdentifier(); 818 // Check to see if it is a keyword. 819 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 820 if (type == FUNCTION && pauseOnFunctionBody) { 821 pauseOnNextLeftBrace = true; 822 } 823 // Add keyword or identifier token. 824 add(type, start); 825 } 826 827 /** 828 * Convert a string to a JavaScript string object. 829 * 830 * @param start Position in source content. 831 * @param length Length of token. 832 * @return JavaScript string object. 833 */ 834 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 835 // Save the current position. 836 final int savePosition = position; 837 // Calculate the end position. 838 final int end = start + length; 839 // Reset to beginning of string. 840 reset(start); 841 842 // Buffer for recording characters. 843 final StringBuilder sb = new StringBuilder(length); 844 845 // Scan until end of string. 846 while (position < end) { 847 // If escape character. 848 if (ch0 == '\\') { 849 skip(1); 850 851 final char next = ch0; 852 final int afterSlash = position; 853 854 skip(1); 855 856 // Special characters. 857 switch (next) { 858 case '0': 859 case '1': 860 case '2': 861 case '3': 862 case '4': 863 case '5': 864 case '6': 865 case '7': { 866 if (strict) { 867 // "\0" itself is allowed in strict mode. Only other 'real' 868 // octal escape sequences are not allowed (eg. "\02", "\31"). 869 // See section 7.8.4 String literals production EscapeSequence 870 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 871 error(Lexer.message("strict.no.octal"), STRING, position, limit); 872 } 873 } 874 reset(afterSlash); 875 // Octal sequence. 876 final int ch = octalSequence(); 877 878 if (ch < 0) { 879 sb.append('\\'); 880 sb.append('x'); 881 } else { 882 sb.append((char)ch); 883 } 884 break; 885 } 886 case 'n': 887 sb.append('\n'); 888 break; 889 case 't': 890 sb.append('\t'); 891 break; 892 case 'b': 893 sb.append('\b'); 894 break; 895 case 'f': 896 sb.append('\f'); 897 break; 898 case 'r': 899 sb.append('\r'); 900 break; 901 case '\'': 902 sb.append('\''); 903 break; 904 case '\"': 905 sb.append('\"'); 906 break; 907 case '\\': 908 sb.append('\\'); 909 break; 910 case '\r': // CR | CRLF 911 if (ch0 == '\n') { 912 skip(1); 913 } 914 // fall through 915 case '\n': // LF 916 case '\u2028': // LS 917 case '\u2029': // PS 918 // continue on the next line, slash-return continues string 919 // literal 920 break; 921 case 'x': { 922 // Hex sequence. 923 final int ch = hexSequence(2, STRING); 924 925 if (ch < 0) { 926 sb.append('\\'); 927 sb.append('x'); 928 } else { 929 sb.append((char)ch); 930 } 931 } 932 break; 933 case 'u': { 934 // Unicode sequence. 935 final int ch = hexSequence(4, STRING); 936 937 if (ch < 0) { 938 sb.append('\\'); 939 sb.append('u'); 940 } else { 941 sb.append((char)ch); 942 } 943 } 944 break; 945 case 'v': 946 sb.append('\u000B'); 947 break; 948 // All other characters. 949 default: 950 sb.append(next); 951 break; 952 } 953 } else if (ch0 == '\r') { 954 // Convert CR-LF or CR to LF line terminator. 955 sb.append('\n'); 956 skip(ch1 == '\n' ? 2 : 1); 957 } else { 958 // Add regular character. 959 sb.append(ch0); 960 skip(1); 961 } 962 } 963 964 // Restore position. 965 reset(savePosition); 966 967 return sb.toString(); 968 } 969 970 /** 971 * Scan over a string literal. 972 * @param add true if we are not just scanning but should actually modify the token stream 973 */ 974 protected void scanString(final boolean add) { 975 // Type of string. 976 TokenType type = STRING; 977 // Record starting quote. 978 final char quote = ch0; 979 // Skip over quote. 980 skip(1); 981 982 // Record beginning of string content. 983 final State stringState = saveState(); 984 985 // Scan until close quote or end of line. 986 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 987 // Skip over escaped character. 988 if (ch0 == '\\') { 989 type = ESCSTRING; 990 skip(1); 991 if (isEOL(ch0)) { 992 // Multiline string literal 993 skipEOL(false); 994 continue; 995 } 996 } 997 // Skip literal character. 998 skip(1); 999 } 1000 1001 // If close quote. 1002 if (ch0 == quote) { 1003 // Skip close quote. 1004 skip(1); 1005 } else { 1006 error(Lexer.message("missing.close.quote"), STRING, position, limit); 1007 } 1008 1009 // If not just scanning. 1010 if (add) { 1011 // Record end of string. 1012 stringState.setLimit(position - 1); 1013 1014 if (scripting && !stringState.isEmpty()) { 1015 switch (quote) { 1016 case '`': 1017 // Mark the beginning of an exec string. 1018 add(EXECSTRING, stringState.position, stringState.limit); 1019 // Frame edit string with left brace. 1020 add(LBRACE, stringState.position, stringState.position); 1021 // Process edit string. 1022 editString(type, stringState); 1023 // Frame edit string with right brace. 1024 add(RBRACE, stringState.limit, stringState.limit); 1025 break; 1026 case '"': 1027 // Only edit double quoted strings. 1028 editString(type, stringState); 1029 break; 1030 case '\'': 1031 // Add string token without editing. 1032 add(type, stringState.position, stringState.limit); 1033 break; 1034 default: 1035 break; 1036 } 1037 } else { 1038 /// Add string token without editing. 1039 add(type, stringState.position, stringState.limit); 1040 } 1041 } 1042 } 1043 1044 /** 1045 * Scan over a template string literal. 1046 */ 1047 private void scanTemplate() { 1048 assert ch0 == '`'; 1049 TokenType type = TEMPLATE; 1050 1051 // Skip over quote and record beginning of string content. 1052 skip(1); 1053 State stringState = saveState(); 1054 1055 // Scan until close quote 1056 while (!atEOF()) { 1057 // Skip over escaped character. 1058 if (ch0 == '`') { 1059 skip(1); 1060 // Record end of string. 1061 stringState.setLimit(position - 1); 1062 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit); 1063 return; 1064 } else if (ch0 == '$' && ch1 == '{') { 1065 skip(2); 1066 stringState.setLimit(position - 2); 1067 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit); 1068 1069 // scan to RBRACE 1070 final Lexer expressionLexer = new Lexer(this, saveState()); 1071 expressionLexer.templateExpressionOpenBraces = 1; 1072 expressionLexer.lexify(); 1073 restoreState(expressionLexer.saveState()); 1074 1075 // scan next middle or tail of the template literal 1076 assert ch0 == '}'; 1077 type = TEMPLATE_MIDDLE; 1078 1079 // Skip over rbrace and record beginning of string content. 1080 skip(1); 1081 stringState = saveState(); 1082 1083 continue; 1084 } else if (ch0 == '\\') { 1085 skip(1); 1086 // EscapeSequence 1087 if (isEOL(ch0)) { 1088 // LineContinuation 1089 skipEOL(false); 1090 continue; 1091 } 1092 } else if (isEOL(ch0)) { 1093 // LineTerminatorSequence 1094 skipEOL(false); 1095 continue; 1096 } 1097 1098 // Skip literal character. 1099 skip(1); 1100 } 1101 1102 error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit); 1103 } 1104 1105 /** 1106 * Convert string to number. 1107 * 1108 * @param valueString String to convert. 1109 * @param radix Numeric base. 1110 * @return Converted number. 1111 */ 1112 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1113 try { 1114 return Integer.parseInt(valueString, radix); 1115 } catch (final NumberFormatException e) { 1116 if (radix == 10) { 1117 return Double.valueOf(valueString); 1118 } 1119 1120 double value = 0.0; 1121 1122 for (int i = 0; i < valueString.length(); i++) { 1123 final char ch = valueString.charAt(i); 1124 // Preverified, should always be a valid digit. 1125 final int digit = convertDigit(ch, radix); 1126 value *= radix; 1127 value += digit; 1128 } 1129 1130 return value; 1131 } 1132 } 1133 1134 /** 1135 * Scan a number. 1136 */ 1137 protected void scanNumber() { 1138 // Record beginning of number. 1139 final int start = position; 1140 // Assume value is a decimal. 1141 TokenType type = DECIMAL; 1142 1143 // First digit of number. 1144 int digit = convertDigit(ch0, 10); 1145 1146 // If number begins with 0x. 1147 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1148 // Skip over 0xN. 1149 skip(3); 1150 // Skip over remaining digits. 1151 while (convertDigit(ch0, 16) != -1) { 1152 skip(1); 1153 } 1154 1155 type = HEXADECIMAL; 1156 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) { 1157 // Skip over 0oN. 1158 skip(3); 1159 // Skip over remaining digits. 1160 while (convertDigit(ch0, 8) != -1) { 1161 skip(1); 1162 } 1163 1164 type = OCTAL; 1165 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) { 1166 // Skip over 0bN. 1167 skip(3); 1168 // Skip over remaining digits. 1169 while (convertDigit(ch0, 2) != -1) { 1170 skip(1); 1171 } 1172 1173 type = BINARY_NUMBER; 1174 } else { 1175 // Check for possible octal constant. 1176 boolean octal = digit == 0; 1177 // Skip first digit if not leading '.'. 1178 if (digit != -1) { 1179 skip(1); 1180 } 1181 1182 // Skip remaining digits. 1183 while ((digit = convertDigit(ch0, 10)) != -1) { 1184 // Check octal only digits. 1185 octal = octal && digit < 8; 1186 // Skip digit. 1187 skip(1); 1188 } 1189 1190 if (octal && position - start > 1) { 1191 type = OCTAL_LEGACY; 1192 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1193 // Must be a double. 1194 if (ch0 == '.') { 1195 // Skip period. 1196 skip(1); 1197 // Skip mantissa. 1198 while (convertDigit(ch0, 10) != -1) { 1199 skip(1); 1200 } 1201 } 1202 1203 // Detect exponent. 1204 if (ch0 == 'E' || ch0 == 'e') { 1205 // Skip E. 1206 skip(1); 1207 // Detect and skip exponent sign. 1208 if (ch0 == '+' || ch0 == '-') { 1209 skip(1); 1210 } 1211 // Skip exponent. 1212 while (convertDigit(ch0, 10) != -1) { 1213 skip(1); 1214 } 1215 } 1216 1217 type = FLOATING; 1218 } 1219 } 1220 1221 if (Character.isJavaIdentifierStart(ch0)) { 1222 error(Lexer.message("missing.space.after.number"), type, position, 1); 1223 } 1224 1225 // Add number token. 1226 add(type, start); 1227 } 1228 1229 /** 1230 * Convert a regex token to a token object. 1231 * 1232 * @param start Position in source content. 1233 * @param length Length of regex token. 1234 * @return Regex token object. 1235 */ 1236 XMLToken valueOfXML(final int start, final int length) { 1237 return new XMLToken(source.getString(start, length)); 1238 } 1239 1240 /** 1241 * Scan over a XML token. 1242 * 1243 * @return TRUE if is an XML literal. 1244 */ 1245 private boolean scanXMLLiteral() { 1246 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1247 if (XML_LITERALS) { 1248 // Record beginning of xml expression. 1249 final int start = position; 1250 1251 int openCount = 0; 1252 1253 do { 1254 if (ch0 == '<') { 1255 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1256 skip(3); 1257 openCount--; 1258 } else if (Character.isJavaIdentifierStart(ch1)) { 1259 skip(2); 1260 openCount++; 1261 } else if (ch1 == '?') { 1262 skip(2); 1263 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1264 skip(4); 1265 } else { 1266 reset(start); 1267 return false; 1268 } 1269 1270 while (!atEOF() && ch0 != '>') { 1271 if (ch0 == '/' && ch1 == '>') { 1272 openCount--; 1273 skip(1); 1274 break; 1275 } else if (ch0 == '\"' || ch0 == '\'') { 1276 scanString(false); 1277 } else { 1278 skip(1); 1279 } 1280 } 1281 1282 if (ch0 != '>') { 1283 reset(start); 1284 return false; 1285 } 1286 1287 skip(1); 1288 } else if (atEOF()) { 1289 reset(start); 1290 return false; 1291 } else { 1292 skip(1); 1293 } 1294 } while (openCount > 0); 1295 1296 add(XML, start); 1297 return true; 1298 } 1299 1300 return false; 1301 } 1302 1303 /** 1304 * Scan over identifier characters. 1305 * 1306 * @return Length of identifier or zero if none found. 1307 */ 1308 private int scanIdentifier() { 1309 final int start = position; 1310 1311 // Make sure first character is valid start character. 1312 if (ch0 == '\\' && ch1 == 'u') { 1313 skip(2); 1314 final int ch = hexSequence(4, TokenType.IDENT); 1315 1316 if (!Character.isJavaIdentifierStart(ch)) { 1317 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1318 } 1319 } else if (!Character.isJavaIdentifierStart(ch0)) { 1320 // Not an identifier. 1321 return 0; 1322 } 1323 1324 // Make sure remaining characters are valid part characters. 1325 while (!atEOF()) { 1326 if (ch0 == '\\' && ch1 == 'u') { 1327 skip(2); 1328 final int ch = hexSequence(4, TokenType.IDENT); 1329 1330 if (!Character.isJavaIdentifierPart(ch)) { 1331 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1332 } 1333 } else if (Character.isJavaIdentifierPart(ch0)) { 1334 skip(1); 1335 } else { 1336 break; 1337 } 1338 } 1339 1340 // Length of identifier sequence. 1341 return position - start; 1342 } 1343 1344 /** 1345 * Compare two identifiers (in content) for equality. 1346 * 1347 * @param aStart Start of first identifier. 1348 * @param aLength Length of first identifier. 1349 * @param bStart Start of second identifier. 1350 * @param bLength Length of second identifier. 1351 * @return True if equal. 1352 */ 1353 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1354 if (aLength == bLength) { 1355 for (int i = 0; i < aLength; i++) { 1356 if (content[aStart + i] != content[bStart + i]) { 1357 return false; 1358 } 1359 } 1360 1361 return true; 1362 } 1363 1364 return false; 1365 } 1366 1367 /** 1368 * Detect if a line starts with a marker identifier. 1369 * 1370 * @param identStart Start of identifier. 1371 * @param identLength Length of identifier. 1372 * @return True if detected. 1373 */ 1374 private boolean hasHereMarker(final int identStart, final int identLength) { 1375 // Skip any whitespace. 1376 skipWhitespace(false); 1377 1378 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1379 } 1380 1381 /** 1382 * Lexer to service edit strings. 1383 */ 1384 private static class EditStringLexer extends Lexer { 1385 /** Type of string literals to emit. */ 1386 final TokenType stringType; 1387 1388 /* 1389 * Constructor. 1390 */ 1391 1392 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1393 super(lexer, stringState); 1394 1395 this.stringType = stringType; 1396 } 1397 1398 /** 1399 * Lexify the contents of the string. 1400 */ 1401 @Override 1402 public void lexify() { 1403 // Record start of string position. 1404 int stringStart = position; 1405 // Indicate that the priming first string has not been emitted. 1406 boolean primed = false; 1407 1408 while (true) { 1409 // Detect end of content. 1410 if (atEOF()) { 1411 break; 1412 } 1413 1414 // Honour escapes (should be well formed.) 1415 if (ch0 == '\\' && stringType == ESCSTRING) { 1416 skip(2); 1417 1418 continue; 1419 } 1420 1421 // If start of expression. 1422 if (ch0 == '$' && ch1 == '{') { 1423 if (!primed || stringStart != position) { 1424 if (primed) { 1425 add(ADD, stringStart, stringStart + 1); 1426 } 1427 1428 add(stringType, stringStart, position); 1429 primed = true; 1430 } 1431 1432 // Skip ${ 1433 skip(2); 1434 1435 // Save expression state. 1436 final State expressionState = saveState(); 1437 1438 // Start with one open brace. 1439 int braceCount = 1; 1440 1441 // Scan for the rest of the string. 1442 while (!atEOF()) { 1443 // If closing brace. 1444 if (ch0 == '}') { 1445 // Break only only if matching brace. 1446 if (--braceCount == 0) { 1447 break; 1448 } 1449 } else if (ch0 == '{') { 1450 // Bump up the brace count. 1451 braceCount++; 1452 } 1453 1454 // Skip to next character. 1455 skip(1); 1456 } 1457 1458 // If braces don't match then report an error. 1459 if (braceCount != 0) { 1460 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1461 } 1462 1463 // Mark end of expression. 1464 expressionState.setLimit(position); 1465 // Skip closing brace. 1466 skip(1); 1467 1468 // Start next string. 1469 stringStart = position; 1470 1471 // Concatenate expression. 1472 add(ADD, expressionState.position, expressionState.position + 1); 1473 add(LPAREN, expressionState.position, expressionState.position + 1); 1474 1475 // Scan expression. 1476 final Lexer lexer = new Lexer(this, expressionState); 1477 lexer.lexify(); 1478 1479 // Close out expression parenthesis. 1480 add(RPAREN, position - 1, position); 1481 1482 continue; 1483 } 1484 1485 // Next character in string. 1486 skip(1); 1487 } 1488 1489 // If there is any unemitted string portion. 1490 if (stringStart != limit) { 1491 // Concatenate remaining string. 1492 if (primed) { 1493 add(ADD, stringStart, 1); 1494 } 1495 1496 add(stringType, stringStart, limit); 1497 } 1498 } 1499 1500 } 1501 1502 /** 1503 * Edit string for nested expressions. 1504 * 1505 * @param stringType Type of string literals to emit. 1506 * @param stringState State of lexer at start of string. 1507 */ 1508 private void editString(final TokenType stringType, final State stringState) { 1509 // Use special lexer to scan string. 1510 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1511 lexer.lexify(); 1512 1513 // Need to keep lexer informed. 1514 last = stringType; 1515 } 1516 1517 /** 1518 * Scan over a here string. 1519 * 1520 * @return TRUE if is a here string. 1521 */ 1522 private boolean scanHereString(final LineInfoReceiver lir, final State oldState) { 1523 assert ch0 == '<' && ch1 == '<'; 1524 if (scripting) { 1525 // Record beginning of here string. 1526 final State saved = saveState(); 1527 1528 // << or <<< 1529 final boolean excludeLastEOL = ch2 != '<'; 1530 1531 if (excludeLastEOL) { 1532 skip(2); 1533 } else { 1534 skip(3); 1535 } 1536 1537 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1538 final char quoteChar = ch0; 1539 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1540 if (noStringEditing) { 1541 skip(1); 1542 } 1543 final int identStart = position; 1544 final int identLength = scanIdentifier(); 1545 if (noStringEditing) { 1546 if (ch0 != quoteChar) { 1547 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1548 restoreState(saved); 1549 return false; 1550 } 1551 skip(1); 1552 } 1553 1554 // Check for identifier. 1555 if (identLength == 0) { 1556 // Treat as shift. 1557 restoreState(saved); 1558 1559 return false; 1560 } 1561 1562 // Record rest of line. 1563 final State restState = saveState(); 1564 // keep line number updated 1565 int lastLine = line; 1566 1567 skipLine(false); 1568 lastLine++; 1569 int lastLinePosition = position; 1570 restState.setLimit(position); 1571 1572 if (oldState.position > position) { 1573 restoreState(oldState); 1574 skipLine(false); 1575 } 1576 1577 // Record beginning of string. 1578 final State stringState = saveState(); 1579 int stringEnd = position; 1580 1581 // Hunt down marker. 1582 while (!atEOF()) { 1583 // Skip any whitespace. 1584 skipWhitespace(false); 1585 1586 //handle trailing blank lines 1587 lastLinePosition = position; 1588 stringEnd = position; 1589 1590 if (hasHereMarker(identStart, identLength)) { 1591 break; 1592 } 1593 1594 skipLine(false); 1595 lastLine++; 1596 lastLinePosition = position; 1597 stringEnd = position; 1598 } 1599 1600 // notify last line information 1601 lir.lineInfo(lastLine, lastLinePosition); 1602 1603 // Record end of string. 1604 stringState.setLimit(stringEnd); 1605 1606 // If marker is missing. 1607 if (stringState.isEmpty() || atEOF()) { 1608 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1609 restoreState(saved); 1610 1611 return false; 1612 } 1613 1614 // Remove last end of line if specified. 1615 if (excludeLastEOL) { 1616 // Handles \n. 1617 if (content[stringEnd - 1] == '\n') { 1618 stringEnd--; 1619 } 1620 1621 // Handles \r and \r\n. 1622 if (content[stringEnd - 1] == '\r') { 1623 stringEnd--; 1624 } 1625 1626 // Update end of string. 1627 stringState.setLimit(stringEnd); 1628 } 1629 1630 // Edit string if appropriate. 1631 if (!noStringEditing && !stringState.isEmpty()) { 1632 editString(STRING, stringState); 1633 } else { 1634 // Add here string. 1635 add(STRING, stringState.position, stringState.limit); 1636 } 1637 1638 // Scan rest of original line. 1639 final Lexer restLexer = new Lexer(this, restState); 1640 1641 restLexer.lexify(); 1642 1643 return true; 1644 } 1645 1646 return false; 1647 } 1648 1649 /** 1650 * Breaks source content down into lex units, adding tokens to the token 1651 * stream. The routine scans until the stream buffer is full. Can be called 1652 * repeatedly until EOF is detected. 1653 */ 1654 public void lexify() { 1655 while (!stream.isFull() || nested) { 1656 // Skip over whitespace. 1657 skipWhitespace(true); 1658 1659 // Detect end of file. 1660 if (atEOF()) { 1661 if (!nested) { 1662 // Add an EOF token at the end. 1663 add(EOF, position); 1664 } 1665 1666 break; 1667 } 1668 1669 // Check for comments. Note that we don't scan for regexp and other literals here as 1670 // we may not have enough context to distinguish them from similar looking operators. 1671 // Instead we break on ambiguous operators below and let the parser decide. 1672 if (ch0 == '/' && skipComments()) { 1673 continue; 1674 } 1675 1676 if (scripting && ch0 == '#' && skipComments()) { 1677 continue; 1678 } 1679 1680 // TokenType for lookup of delimiter or operator. 1681 TokenType type; 1682 1683 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1684 // '.' followed by digit. 1685 // Scan and add a number. 1686 scanNumber(); 1687 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1688 if (templateExpressionOpenBraces > 0) { 1689 if (type == LBRACE) { 1690 templateExpressionOpenBraces++; 1691 } else if (type == RBRACE) { 1692 if (--templateExpressionOpenBraces == 0) { 1693 break; 1694 } 1695 } 1696 } 1697 1698 // Get the number of characters in the token. 1699 final int typeLength = type.getLength(); 1700 // Skip that many characters. 1701 skip(typeLength); 1702 // Add operator token. 1703 add(type, position - typeLength); 1704 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1705 // We break to let the parser decide what it is. 1706 if (canStartLiteral(type)) { 1707 break; 1708 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1709 pauseOnNextLeftBrace = false; 1710 break; 1711 } 1712 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1713 // Scan and add identifier or keyword. 1714 scanIdentifierOrKeyword(); 1715 } else if (isStringDelimiter(ch0)) { 1716 // Scan and add a string. 1717 scanString(true); 1718 } else if (Character.isDigit(ch0)) { 1719 // Scan and add a number. 1720 scanNumber(); 1721 } else if (isTemplateDelimiter(ch0) && es6) { 1722 // Scan and add template in ES6 mode. 1723 scanTemplate(); 1724 } else if (isTemplateDelimiter(ch0) && scripting) { 1725 // Scan and add an exec string ('`') in scripting mode. 1726 scanString(true); 1727 } else { 1728 // Don't recognize this character. 1729 skip(1); 1730 add(ERROR, position - 1); 1731 } 1732 } 1733 } 1734 1735 /** 1736 * Return value of token given its token descriptor. 1737 * 1738 * @param token Token descriptor. 1739 * @return JavaScript value. 1740 */ 1741 Object getValueOf(final long token, final boolean strict) { 1742 final int start = Token.descPosition(token); 1743 final int len = Token.descLength(token); 1744 1745 switch (Token.descType(token)) { 1746 case DECIMAL: 1747 return Lexer.valueOf(source.getString(start, len), 10); // number 1748 case HEXADECIMAL: 1749 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1750 case OCTAL_LEGACY: 1751 return Lexer.valueOf(source.getString(start, len), 8); // number 1752 case OCTAL: 1753 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number 1754 case BINARY_NUMBER: 1755 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number 1756 case FLOATING: 1757 final String str = source.getString(start, len); 1758 final double value = Double.valueOf(str); 1759 if (str.indexOf('.') != -1) { 1760 return value; //number 1761 } 1762 //anything without an explicit decimal point is still subject to a 1763 //"representable as int or long" check. Then the programmer does not 1764 //explicitly code something as a double. For example new Color(int, int, int) 1765 //and new Color(float, float, float) will get ambiguous for cases like 1766 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1767 //yet we don't want e.g. 1e6 to be a double unnecessarily 1768 if (JSType.isStrictlyRepresentableAsInt(value)) { 1769 return (int)value; 1770 } 1771 return value; 1772 case STRING: 1773 return source.getString(start, len); // String 1774 case ESCSTRING: 1775 return valueOfString(start, len, strict); // String 1776 case IDENT: 1777 return valueOfIdent(start, len); // String 1778 case REGEX: 1779 return valueOfPattern(start, len); // RegexToken::LexerToken 1780 case TEMPLATE: 1781 case TEMPLATE_HEAD: 1782 case TEMPLATE_MIDDLE: 1783 case TEMPLATE_TAIL: 1784 return valueOfString(start, len, true); // String 1785 case XML: 1786 return valueOfXML(start, len); // XMLToken::LexerToken 1787 case DIRECTIVE_COMMENT: 1788 return source.getString(start, len); 1789 default: 1790 break; 1791 } 1792 1793 return null; 1794 } 1795 1796 /** 1797 * Get the raw string value of a template literal string part. 1798 * 1799 * @param token template string token 1800 * @return raw string 1801 */ 1802 public String valueOfRawString(final long token) { 1803 final int start = Token.descPosition(token); 1804 final int length = Token.descLength(token); 1805 1806 // Save the current position. 1807 final int savePosition = position; 1808 // Calculate the end position. 1809 final int end = start + length; 1810 // Reset to beginning of string. 1811 reset(start); 1812 1813 // Buffer for recording characters. 1814 final StringBuilder sb = new StringBuilder(length); 1815 1816 // Scan until end of string. 1817 while (position < end) { 1818 if (ch0 == '\r') { 1819 // Convert CR-LF or CR to LF line terminator. 1820 sb.append('\n'); 1821 skip(ch1 == '\n' ? 2 : 1); 1822 } else { 1823 // Add regular character. 1824 sb.append(ch0); 1825 skip(1); 1826 } 1827 } 1828 1829 // Restore position. 1830 reset(savePosition); 1831 1832 return sb.toString(); 1833 } 1834 1835 /** 1836 * Get the correctly localized error message for a given message id format arguments 1837 * @param msgId message id 1838 * @param args format arguments 1839 * @return message 1840 */ 1841 protected static String message(final String msgId, final String... args) { 1842 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1843 } 1844 1845 /** 1846 * Generate a runtime exception 1847 * 1848 * @param message error message 1849 * @param type token type 1850 * @param start start position of lexed error 1851 * @param length length of lexed error 1852 * @throws ParserException unconditionally 1853 */ 1854 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1855 final long token = Token.toDesc(type, start, length); 1856 final int pos = Token.descPosition(token); 1857 final int lineNum = source.getLine(pos); 1858 final int columnNum = source.getColumn(pos); 1859 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1860 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1861 } 1862 1863 /** 1864 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1865 * This is the abstract superclass 1866 */ 1867 public static abstract class LexerToken implements Serializable { 1868 private static final long serialVersionUID = 1L; 1869 1870 private final String expression; 1871 1872 /** 1873 * Constructor 1874 * @param expression token expression 1875 */ 1876 protected LexerToken(final String expression) { 1877 this.expression = expression; 1878 } 1879 1880 /** 1881 * Get the expression 1882 * @return expression 1883 */ 1884 public String getExpression() { 1885 return expression; 1886 } 1887 } 1888 1889 /** 1890 * Temporary container for regular expressions. 1891 */ 1892 public static class RegexToken extends LexerToken { 1893 private static final long serialVersionUID = 1L; 1894 1895 /** Options. */ 1896 private final String options; 1897 1898 /** 1899 * Constructor. 1900 * 1901 * @param expression regexp expression 1902 * @param options regexp options 1903 */ 1904 public RegexToken(final String expression, final String options) { 1905 super(expression); 1906 this.options = options; 1907 } 1908 1909 /** 1910 * Get regexp options 1911 * @return options 1912 */ 1913 public String getOptions() { 1914 return options; 1915 } 1916 1917 @Override 1918 public String toString() { 1919 return '/' + getExpression() + '/' + options; 1920 } 1921 } 1922 1923 /** 1924 * Temporary container for XML expression. 1925 */ 1926 public static class XMLToken extends LexerToken { 1927 private static final long serialVersionUID = 1L; 1928 1929 /** 1930 * Constructor. 1931 * 1932 * @param expression XML expression 1933 */ 1934 public XMLToken(final String expression) { 1935 super(expression); 1936 } 1937 } 1938 }