1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  32 import static jdk.nashorn.internal.parser.TokenType.EOF;
  33 import static jdk.nashorn.internal.parser.TokenType.EOL;
  34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  44 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  46 import static jdk.nashorn.internal.parser.TokenType.STRING;
  47 import static jdk.nashorn.internal.parser.TokenType.XML;
  48 
  49 import java.io.Serializable;
  50 import jdk.nashorn.internal.runtime.ECMAErrors;
  51 import jdk.nashorn.internal.runtime.ErrorManager;
  52 import jdk.nashorn.internal.runtime.JSErrorType;
  53 import jdk.nashorn.internal.runtime.JSType;
  54 import jdk.nashorn.internal.runtime.ParserException;
  55 import jdk.nashorn.internal.runtime.Source;
  56 import jdk.nashorn.internal.runtime.options.Options;
  57 
  58 /**
  59  * Responsible for converting source content into a stream of tokens.
  60  *
  61  */
  62 @SuppressWarnings("fallthrough")
  63 public class Lexer extends Scanner {
  64     private static final long MIN_INT_L = Integer.MIN_VALUE;
  65     private static final long MAX_INT_L = Integer.MAX_VALUE;
  66 
  67     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  68 
  69     /** Content source. */
  70     private final Source source;
  71 
  72     /** Buffered stream for tokens. */
  73     private final TokenStream stream;
  74 
  75     /** True if here and edit strings are supported. */
  76     private final boolean scripting;
  77 
  78     /** True if a nested scan. (scan to completion, no EOF.) */
  79     private final boolean nested;
  80 
  81     /** Pending new line number and position. */
  82     int pendingLine;
  83 
  84     /** Position of last EOL + 1. */
  85     private int linePosition;
  86 
  87     /** Type of last token added. */
  88     private TokenType last;
  89 
  90     private final boolean pauseOnFunctionBody;
  91     private boolean pauseOnNextLeftBrace;
  92 
  93     private static final String SPACETAB = " \t";  // ASCII space and tab
  94     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
  95 
  96     private static final String JAVASCRIPT_WHITESPACE_EOL =
  97         LFCR +
  98         "\u2028" + // line separator
  99         "\u2029"   // paragraph separator
 100         ;
 101     private static final String JAVASCRIPT_WHITESPACE =
 102         SPACETAB +
 103         JAVASCRIPT_WHITESPACE_EOL +
 104         "\u000b" + // tabulation line
 105         "\u000c" + // ff (ctrl-l)
 106         "\u00a0" + // Latin-1 space
 107         "\u1680" + // Ogham space mark
 108         "\u180e" + // separator, Mongolian vowel
 109         "\u2000" + // en quad
 110         "\u2001" + // em quad
 111         "\u2002" + // en space
 112         "\u2003" + // em space
 113         "\u2004" + // three-per-em space
 114         "\u2005" + // four-per-em space
 115         "\u2006" + // six-per-em space
 116         "\u2007" + // figure space
 117         "\u2008" + // punctuation space
 118         "\u2009" + // thin space
 119         "\u200a" + // hair space
 120         "\u202f" + // narrow no-break space
 121         "\u205f" + // medium mathematical space
 122         "\u3000" + // ideographic space
 123         "\ufeff"   // byte order mark
 124         ;
 125 
 126     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 127         "\\u000a" + // line feed
 128         "\\u000d" + // carriage return (ctrl-m)
 129         "\\u2028" + // line separator
 130         "\\u2029" + // paragraph separator
 131         "\\u0009" + // tab
 132         "\\u0020" + // ASCII space
 133         "\\u000b" + // tabulation line
 134         "\\u000c" + // ff (ctrl-l)
 135         "\\u00a0" + // Latin-1 space
 136         "\\u1680" + // Ogham space mark
 137         "\\u180e" + // separator, Mongolian vowel
 138         "\\u2000" + // en quad
 139         "\\u2001" + // em quad
 140         "\\u2002" + // en space
 141         "\\u2003" + // em space
 142         "\\u2004" + // three-per-em space
 143         "\\u2005" + // four-per-em space
 144         "\\u2006" + // six-per-em space
 145         "\\u2007" + // figure space
 146         "\\u2008" + // punctuation space
 147         "\\u2009" + // thin space
 148         "\\u200a" + // hair space
 149         "\\u202f" + // narrow no-break space
 150         "\\u205f" + // medium mathematical space
 151         "\\u3000" + // ideographic space
 152         "\\ufeff"   // byte order mark
 153         ;
 154 
 155     static String unicodeEscape(final char ch) {
 156         final StringBuilder sb = new StringBuilder();
 157 
 158         sb.append("\\u");
 159 
 160         final String hex = Integer.toHexString(ch);
 161         for (int i = hex.length(); i < 4; i++) {
 162             sb.append('0');
 163         }
 164         sb.append(hex);
 165 
 166         return sb.toString();
 167     }
 168 
 169     /**
 170      * Constructor
 171      *
 172      * @param source    the source
 173      * @param stream    the token stream to lex
 174      */
 175     public Lexer(final Source source, final TokenStream stream) {
 176         this(source, stream, false);
 177     }
 178 
 179     /**
 180      * Constructor
 181      *
 182      * @param source    the source
 183      * @param stream    the token stream to lex
 184      * @param scripting are we in scripting mode
 185      */
 186     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 187         this(source, 0, source.getLength(), stream, scripting, false);
 188     }
 189 
 190     /**
 191      * Constructor
 192      *
 193      * @param source    the source
 194      * @param start     start position in source from which to start lexing
 195      * @param len       length of source segment to lex
 196      * @param stream    token stream to lex
 197      * @param scripting are we in scripting mode
 198      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 199      * function body. This is used with the feature where the parser is skipping nested function bodies to
 200      * avoid reading ahead unnecessarily when we skip the function bodies.
 201      */
 202 
 203     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
 204         super(source.getContent(), 1, start, len);
 205         this.source      = source;
 206         this.stream      = stream;
 207         this.scripting   = scripting;
 208         this.nested      = false;
 209         this.pendingLine = 1;
 210         this.last        = EOL;
 211 
 212         this.pauseOnFunctionBody = pauseOnFunctionBody;
 213     }
 214 
 215     private Lexer(final Lexer lexer, final State state) {
 216         super(lexer, state);
 217 
 218         source = lexer.source;
 219         stream = lexer.stream;
 220         scripting = lexer.scripting;
 221         nested = true;
 222 
 223         pendingLine = state.pendingLine;
 224         linePosition = state.linePosition;
 225         last = EOL;
 226         pauseOnFunctionBody = false;
 227     }
 228 
 229     static class State extends Scanner.State {
 230         /** Pending new line number and position. */
 231         public final int pendingLine;
 232 
 233         /** Position of last EOL + 1. */
 234         public final int linePosition;
 235 
 236         /** Type of last token added. */
 237         public final TokenType last;
 238 
 239         /*
 240          * Constructor.
 241          */
 242 
 243         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 244             super(position, limit, line);
 245 
 246             this.pendingLine = pendingLine;
 247             this.linePosition = linePosition;
 248             this.last = last;
 249         }
 250     }
 251 
 252     /**
 253      * Save the state of the scan.
 254      *
 255      * @return Captured state.
 256      */
 257     @Override
 258     State saveState() {
 259         return new State(position, limit, line, pendingLine, linePosition, last);
 260     }
 261 
 262     /**
 263      * Restore the state of the scan.
 264      *
 265      * @param state
 266      *            Captured state.
 267      */
 268     void restoreState(final State state) {
 269         super.restoreState(state);
 270 
 271         pendingLine = state.pendingLine;
 272         linePosition = state.linePosition;
 273         last = state.last;
 274     }
 275 
 276     /**
 277      * Add a new token to the stream.
 278      *
 279      * @param type
 280      *            Token type.
 281      * @param start
 282      *            Start position.
 283      * @param end
 284      *            End position.
 285      */
 286     protected void add(final TokenType type, final int start, final int end) {
 287         // Record last token.
 288         last = type;
 289 
 290         // Only emit the last EOL in a cluster.
 291         if (type == EOL) {
 292             pendingLine = end;
 293             linePosition = start;
 294         } else {
 295             // Write any pending EOL to stream.
 296             if (pendingLine != -1) {
 297                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 298                 pendingLine = -1;
 299             }
 300 
 301             // Write token to stream.
 302             stream.put(Token.toDesc(type, start, end - start));
 303         }
 304     }
 305 
 306     /**
 307      * Add a new token to the stream.
 308      *
 309      * @param type
 310      *            Token type.
 311      * @param start
 312      *            Start position.
 313      */
 314     protected void add(final TokenType type, final int start) {
 315         add(type, start, position);
 316     }
 317 
 318     /**
 319      * Return the String of valid whitespace characters for regular
 320      * expressions in JavaScript
 321      * @return regexp whitespace string
 322      */
 323     public static String getWhitespaceRegExp() {
 324         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 325     }
 326 
 327     /**
 328      * Skip end of line.
 329      *
 330      * @param addEOL true if EOL token should be recorded.
 331      */
 332     private void skipEOL(final boolean addEOL) {
 333 
 334         if (ch0 == '\r') { // detect \r\n pattern
 335             skip(1);
 336             if (ch0 == '\n') {
 337                 skip(1);
 338             }
 339         } else { // all other space, ch0 is guaranteed to be EOL or \0
 340             skip(1);
 341         }
 342 
 343         // bump up line count
 344         line++;
 345 
 346         if (addEOL) {
 347             // Add an EOL token.
 348             add(EOL, position, line);
 349         }
 350     }
 351 
 352     /**
 353      * Skip over rest of line including end of line.
 354      *
 355      * @param addEOL true if EOL token should be recorded.
 356      */
 357     private void skipLine(final boolean addEOL) {
 358         // Ignore characters.
 359         while (!isEOL(ch0) && !atEOF()) {
 360             skip(1);
 361         }
 362         // Skip over end of line.
 363         skipEOL(addEOL);
 364     }
 365 
 366     /**
 367      * Test whether a char is valid JavaScript whitespace
 368      * @param ch a char
 369      * @return true if valid JavaScript whitespace
 370      */
 371     public static boolean isJSWhitespace(final char ch) {
 372         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 373     }
 374 
 375     /**
 376      * Test whether a char is valid JavaScript end of line
 377      * @param ch a char
 378      * @return true if valid JavaScript end of line
 379      */
 380     public static boolean isJSEOL(final char ch) {
 381         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 382     }
 383 
 384     /**
 385      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 386      * strings ('`') in scripting mode.
 387      * @param ch a char
 388      * @return true if string delimiter
 389      */
 390     protected boolean isStringDelimiter(final char ch) {
 391         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 392     }
 393 
 394     /**
 395      * Test whether a char is valid JavaScript whitespace
 396      * @param ch a char
 397      * @return true if valid JavaScript whitespace
 398      */
 399     protected boolean isWhitespace(final char ch) {
 400         return Lexer.isJSWhitespace(ch);
 401     }
 402 
 403     /**
 404      * Test whether a char is valid JavaScript end of line
 405      * @param ch a char
 406      * @return true if valid JavaScript end of line
 407      */
 408     protected boolean isEOL(final char ch) {
 409         return Lexer.isJSEOL(ch);
 410     }
 411 
 412     /**
 413      * Skip over whitespace and detect end of line, adding EOL tokens if
 414      * encountered.
 415      *
 416      * @param addEOL true if EOL tokens should be recorded.
 417      */
 418     private void skipWhitespace(final boolean addEOL) {
 419         while (isWhitespace(ch0)) {
 420             if (isEOL(ch0)) {
 421                 skipEOL(addEOL);
 422             } else {
 423                 skip(1);
 424             }
 425         }
 426     }
 427 
 428     /**
 429      * Skip over comments.
 430      *
 431      * @return True if a comment.
 432      */
 433     protected boolean skipComments() {
 434         // Save the current position.
 435         final int start = position;
 436 
 437         if (ch0 == '/') {
 438             // Is it a // comment.
 439             if (ch1 == '/') {
 440                 // Skip over //.
 441                 skip(2);
 442 
 443                 boolean directiveComment = false;
 444                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 445                     directiveComment = true;
 446                 }
 447 
 448                 // Scan for EOL.
 449                 while (!atEOF() && !isEOL(ch0)) {
 450                     skip(1);
 451                 }
 452                 // Did detect a comment.
 453                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 454                 return true;
 455             } else if (ch1 == '*') {
 456                 // Skip over /*.
 457                 skip(2);
 458                 // Scan for */.
 459                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 460                     // If end of line handle else skip character.
 461                     if (isEOL(ch0)) {
 462                         skipEOL(true);
 463                     } else {
 464                         skip(1);
 465                     }
 466                 }
 467 
 468                 if (atEOF()) {
 469                     // TODO - Report closing */ missing in parser.
 470                     add(ERROR, start);
 471                 } else {
 472                     // Skip */.
 473                     skip(2);
 474                 }
 475 
 476                 // Did detect a comment.
 477                 add(COMMENT, start);
 478                 return true;
 479             }
 480         } else if (ch0 == '#') {
 481             assert scripting;
 482             // shell style comment
 483             // Skip over #.
 484             skip(1);
 485             // Scan for EOL.
 486             while (!atEOF() && !isEOL(ch0)) {
 487                 skip(1);
 488             }
 489             // Did detect a comment.
 490             add(COMMENT, start);
 491             return true;
 492         }
 493 
 494         // Not a comment.
 495         return false;
 496     }
 497 
 498     /**
 499      * Convert a regex token to a token object.
 500      *
 501      * @param start  Position in source content.
 502      * @param length Length of regex token.
 503      * @return Regex token object.
 504      */
 505     public RegexToken valueOfPattern(final int start, final int length) {
 506         // Save the current position.
 507         final int savePosition = position;
 508         // Reset to beginning of content.
 509         reset(start);
 510         // Buffer for recording characters.
 511         final StringBuilder sb = new StringBuilder(length);
 512 
 513         // Skip /.
 514         skip(1);
 515         boolean inBrackets = false;
 516         // Scan for closing /, stopping at end of line.
 517         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 518             // Skip over escaped character.
 519             if (ch0 == '\\') {
 520                 sb.append(ch0);
 521                 sb.append(ch1);
 522                 skip(2);
 523             } else {
 524                 if (ch0 == '[') {
 525                     inBrackets = true;
 526                 } else if (ch0 == ']') {
 527                     inBrackets = false;
 528                 }
 529 
 530                 // Skip literal character.
 531                 sb.append(ch0);
 532                 skip(1);
 533             }
 534         }
 535 
 536         // Get pattern as string.
 537         final String regex = sb.toString();
 538 
 539         // Skip /.
 540         skip(1);
 541 
 542         // Options as string.
 543         final String options = source.getString(position, scanIdentifier());
 544 
 545         reset(savePosition);
 546 
 547         // Compile the pattern.
 548         return new RegexToken(regex, options);
 549     }
 550 
 551     /**
 552      * Return true if the given token can be the beginning of a literal.
 553      *
 554      * @param token a token
 555      * @return true if token can start a literal.
 556      */
 557     public boolean canStartLiteral(final TokenType token) {
 558         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 559     }
 560 
 561     /**
 562      * interface to receive line information for multi-line literals.
 563      */
 564     protected interface LineInfoReceiver {
 565         /**
 566          * Receives line information
 567          * @param line last line number
 568          * @param linePosition position of last line
 569          */
 570         public void lineInfo(int line, int linePosition);
 571     }
 572 
 573     /**
 574      * Check whether the given token represents the beginning of a literal. If so scan
 575      * the literal and return <tt>true</tt>, otherwise return false.
 576      *
 577      * @param token the token.
 578      * @param startTokenType the token type.
 579      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 580      * @return True if a literal beginning with startToken was found and scanned.
 581      */
 582     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 583         // Check if it can be a literal.
 584         if (!canStartLiteral(startTokenType)) {
 585             return false;
 586         }
 587         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 588         if (stream.get(stream.last()) != token) {
 589             return false;
 590         }
 591         // Rewind to token start position
 592         reset(Token.descPosition(token));
 593 
 594         if (ch0 == '/') {
 595             return scanRegEx();
 596         } else if (ch0 == '<') {
 597             if (ch1 == '<') {
 598                 return scanHereString(lir);
 599             } else if (Character.isJavaIdentifierStart(ch1)) {
 600                 return scanXMLLiteral();
 601             }
 602         }
 603 
 604         return false;
 605     }
 606 
 607     /**
 608      * Scan over regex literal.
 609      *
 610      * @return True if a regex literal.
 611      */
 612     private boolean scanRegEx() {
 613         assert ch0 == '/';
 614         // Make sure it's not a comment.
 615         if (ch1 != '/' && ch1 != '*') {
 616             // Record beginning of literal.
 617             final int start = position;
 618             // Skip /.
 619             skip(1);
 620             boolean inBrackets = false;
 621 
 622             // Scan for closing /, stopping at end of line.
 623             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 624                 // Skip over escaped character.
 625                 if (ch0 == '\\') {
 626                     skip(1);
 627                     if (isEOL(ch0)) {
 628                         reset(start);
 629                         return false;
 630                     }
 631                     skip(1);
 632                 } else {
 633                     if (ch0 == '[') {
 634                         inBrackets = true;
 635                     } else if (ch0 == ']') {
 636                         inBrackets = false;
 637                     }
 638 
 639                     // Skip literal character.
 640                     skip(1);
 641                 }
 642             }
 643 
 644             // If regex literal.
 645             if (ch0 == '/') {
 646                 // Skip /.
 647                 skip(1);
 648 
 649                 // Skip over options.
 650                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 651                     skip(1);
 652                 }
 653 
 654                 // Add regex token.
 655                 add(REGEX, start);
 656                 // Regex literal detected.
 657                 return true;
 658             }
 659 
 660             // False start try again.
 661             reset(start);
 662         }
 663 
 664         // Regex literal not detected.
 665         return false;
 666     }
 667 
 668     /**
 669      * Convert a digit to a integer.  Can't use Character.digit since we are
 670      * restricted to ASCII by the spec.
 671      *
 672      * @param ch   Character to convert.
 673      * @param base Numeric base.
 674      *
 675      * @return The converted digit or -1 if invalid.
 676      */
 677     protected static int convertDigit(final char ch, final int base) {
 678         int digit;
 679 
 680         if ('0' <= ch && ch <= '9') {
 681             digit = ch - '0';
 682         } else if ('A' <= ch && ch <= 'Z') {
 683             digit = ch - 'A' + 10;
 684         } else if ('a' <= ch && ch <= 'z') {
 685             digit = ch - 'a' + 10;
 686         } else {
 687             return -1;
 688         }
 689 
 690         return digit < base ? digit : -1;
 691     }
 692 
 693 
 694     /**
 695      * Get the value of a hexadecimal numeric sequence.
 696      *
 697      * @param length Number of digits.
 698      * @param type   Type of token to report against.
 699      * @return Value of sequence or < 0 if no digits.
 700      */
 701     private int hexSequence(final int length, final TokenType type) {
 702         int value = 0;
 703 
 704         for (int i = 0; i < length; i++) {
 705             final int digit = convertDigit(ch0, 16);
 706 
 707             if (digit == -1) {
 708                 error(Lexer.message("invalid.hex"), type, position, limit);
 709                 return i == 0 ? -1 : value;
 710             }
 711 
 712             value = digit | value << 4;
 713             skip(1);
 714         }
 715 
 716         return value;
 717     }
 718 
 719     /**
 720      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 721      *
 722      * @return Value of sequence.
 723      */
 724     private int octalSequence() {
 725         int value = 0;
 726 
 727         for (int i = 0; i < 3; i++) {
 728             final int digit = convertDigit(ch0, 8);
 729 
 730             if (digit == -1) {
 731                 break;
 732             }
 733             value = digit | value << 3;
 734             skip(1);
 735 
 736             if (i == 1 && value >= 32) {
 737                 break;
 738             }
 739         }
 740         return value;
 741     }
 742 
 743     /**
 744      * Convert a string to a JavaScript identifier.
 745      *
 746      * @param start  Position in source content.
 747      * @param length Length of token.
 748      * @return Ident string or null if an error.
 749      */
 750     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 751         // Save the current position.
 752         final int savePosition = position;
 753         // End of scan.
 754         final int end = start + length;
 755         // Reset to beginning of content.
 756         reset(start);
 757         // Buffer for recording characters.
 758         final StringBuilder sb = new StringBuilder(length);
 759 
 760         // Scan until end of line or end of file.
 761         while (!atEOF() && position < end && !isEOL(ch0)) {
 762             // If escape character.
 763             if (ch0 == '\\' && ch1 == 'u') {
 764                 skip(2);
 765                 final int ch = hexSequence(4, TokenType.IDENT);
 766                 if (isWhitespace((char)ch)) {
 767                     return null;
 768                 }
 769                 if (ch < 0) {
 770                     sb.append('\\');
 771                     sb.append('u');
 772                 } else {
 773                     sb.append((char)ch);
 774                 }
 775             } else {
 776                 // Add regular character.
 777                 sb.append(ch0);
 778                 skip(1);
 779             }
 780         }
 781 
 782         // Restore position.
 783         reset(savePosition);
 784 
 785         return sb.toString();
 786     }
 787 
 788     /**
 789      * Scan over and identifier or keyword. Handles identifiers containing
 790      * encoded Unicode chars.
 791      *
 792      * Example:
 793      *
 794      * var \u0042 = 44;
 795      */
 796     private void scanIdentifierOrKeyword() {
 797         // Record beginning of identifier.
 798         final int start = position;
 799         // Scan identifier.
 800         final int length = scanIdentifier();
 801         // Check to see if it is a keyword.
 802         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 803         if (type == FUNCTION && pauseOnFunctionBody) {
 804             pauseOnNextLeftBrace = true;
 805         }
 806         // Add keyword or identifier token.
 807         add(type, start);
 808     }
 809 
 810     /**
 811      * Convert a string to a JavaScript string object.
 812      *
 813      * @param start  Position in source content.
 814      * @param length Length of token.
 815      * @return JavaScript string object.
 816      */
 817     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 818         // Save the current position.
 819         final int savePosition = position;
 820         // Calculate the end position.
 821         final int end = start + length;
 822         // Reset to beginning of string.
 823         reset(start);
 824 
 825         // Buffer for recording characters.
 826         final StringBuilder sb = new StringBuilder(length);
 827 
 828         // Scan until end of string.
 829         while (position < end) {
 830             // If escape character.
 831             if (ch0 == '\\') {
 832                 skip(1);
 833 
 834                 final char next = ch0;
 835                 final int afterSlash = position;
 836 
 837                 skip(1);
 838 
 839                 // Special characters.
 840                 switch (next) {
 841                 case '0':
 842                 case '1':
 843                 case '2':
 844                 case '3':
 845                 case '4':
 846                 case '5':
 847                 case '6':
 848                 case '7': {
 849                     if (strict) {
 850                         // "\0" itself is allowed in strict mode. Only other 'real'
 851                         // octal escape sequences are not allowed (eg. "\02", "\31").
 852                         // See section 7.8.4 String literals production EscapeSequence
 853                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 854                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 855                         }
 856                     }
 857                     reset(afterSlash);
 858                     // Octal sequence.
 859                     final int ch = octalSequence();
 860 
 861                     if (ch < 0) {
 862                         sb.append('\\');
 863                         sb.append('x');
 864                     } else {
 865                         sb.append((char)ch);
 866                     }
 867                     break;
 868                 }
 869                 case 'n':
 870                     sb.append('\n');
 871                     break;
 872                 case 't':
 873                     sb.append('\t');
 874                     break;
 875                 case 'b':
 876                     sb.append('\b');
 877                     break;
 878                 case 'f':
 879                     sb.append('\f');
 880                     break;
 881                 case 'r':
 882                     sb.append('\r');
 883                     break;
 884                 case '\'':
 885                     sb.append('\'');
 886                     break;
 887                 case '\"':
 888                     sb.append('\"');
 889                     break;
 890                 case '\\':
 891                     sb.append('\\');
 892                     break;
 893                 case '\r': // CR | CRLF
 894                     if (ch0 == '\n') {
 895                         skip(1);
 896                     }
 897                     // fall through
 898                 case '\n': // LF
 899                 case '\u2028': // LS
 900                 case '\u2029': // PS
 901                     // continue on the next line, slash-return continues string
 902                     // literal
 903                     break;
 904                 case 'x': {
 905                     // Hex sequence.
 906                     final int ch = hexSequence(2, STRING);
 907 
 908                     if (ch < 0) {
 909                         sb.append('\\');
 910                         sb.append('x');
 911                     } else {
 912                         sb.append((char)ch);
 913                     }
 914                 }
 915                     break;
 916                 case 'u': {
 917                     // Unicode sequence.
 918                     final int ch = hexSequence(4, STRING);
 919 
 920                     if (ch < 0) {
 921                         sb.append('\\');
 922                         sb.append('u');
 923                     } else {
 924                         sb.append((char)ch);
 925                     }
 926                 }
 927                     break;
 928                 case 'v':
 929                     sb.append('\u000B');
 930                     break;
 931                 // All other characters.
 932                 default:
 933                     sb.append(next);
 934                     break;
 935                 }
 936             } else {
 937                 // Add regular character.
 938                 sb.append(ch0);
 939                 skip(1);
 940             }
 941         }
 942 
 943         // Restore position.
 944         reset(savePosition);
 945 
 946         return sb.toString();
 947     }
 948 
 949     /**
 950      * Scan over a string literal.
 951      * @param add true if we nare not just scanning but should actually modify the token stream
 952      */
 953     protected void scanString(final boolean add) {
 954         // Type of string.
 955         TokenType type = STRING;
 956         // Record starting quote.
 957         final char quote = ch0;
 958         // Skip over quote.
 959         skip(1);
 960 
 961         // Record beginning of string content.
 962         final State stringState = saveState();
 963 
 964         // Scan until close quote or end of line.
 965         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 966             // Skip over escaped character.
 967             if (ch0 == '\\') {
 968                 type = ESCSTRING;
 969                 skip(1);
 970                 if (! isEscapeCharacter(ch0)) {
 971                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 972                 }
 973                 if (isEOL(ch0)) {
 974                     // Multiline string literal
 975                     skipEOL(false);
 976                     continue;
 977                 }
 978             }
 979             // Skip literal character.
 980             skip(1);
 981         }
 982 
 983         // If close quote.
 984         if (ch0 == quote) {
 985             // Skip close quote.
 986             skip(1);
 987         } else {
 988             error(Lexer.message("missing.close.quote"), STRING, position, limit);
 989         }
 990 
 991         // If not just scanning.
 992         if (add) {
 993             // Record end of string.
 994             stringState.setLimit(position - 1);
 995 
 996             if (scripting && !stringState.isEmpty()) {
 997                 switch (quote) {
 998                 case '`':
 999                     // Mark the beginning of an exec string.
1000                     add(EXECSTRING, stringState.position, stringState.limit);
1001                     // Frame edit string with left brace.
1002                     add(LBRACE, stringState.position, stringState.position);
1003                     // Process edit string.
1004                     editString(type, stringState);
1005                     // Frame edit string with right brace.
1006                     add(RBRACE, stringState.limit, stringState.limit);
1007                     break;
1008                 case '"':
1009                     // Only edit double quoted strings.
1010                     editString(type, stringState);
1011                     break;
1012                 case '\'':
1013                     // Add string token without editing.
1014                     add(type, stringState.position, stringState.limit);
1015                     break;
1016                 default:
1017                     break;
1018                 }
1019             } else {
1020                 /// Add string token without editing.
1021                 add(type, stringState.position, stringState.limit);
1022             }
1023         }
1024     }
1025 
1026     /**
1027      * Is the given character a valid escape char after "\" ?
1028      *
1029      * @param ch character to be checked
1030      * @return if the given character is valid after "\"
1031      */
1032     protected boolean isEscapeCharacter(final char ch) {
1033         return true;
1034     }
1035 
1036     /**
1037      * Convert string to number.
1038      *
1039      * @param valueString  String to convert.
1040      * @param radix        Numeric base.
1041      * @return Converted number.
1042      */
1043     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1044         try {
1045             return Integer.parseInt(valueString, radix);
1046         } catch (final NumberFormatException e) {
1047             if (radix == 10) {
1048                 return Double.valueOf(valueString);
1049             }
1050 
1051             double value = 0.0;
1052 
1053             for (int i = 0; i < valueString.length(); i++) {
1054                 final char ch = valueString.charAt(i);
1055                 // Preverified, should always be a valid digit.
1056                 final int digit = convertDigit(ch, radix);
1057                 value *= radix;
1058                 value += digit;
1059             }
1060 
1061             return value;
1062         }
1063     }
1064 
1065     /**
1066      * Scan a number.
1067      */
1068     protected void scanNumber() {
1069         // Record beginning of number.
1070         final int start = position;
1071         // Assume value is a decimal.
1072         TokenType type = DECIMAL;
1073 
1074         // First digit of number.
1075         int digit = convertDigit(ch0, 10);
1076 
1077         // If number begins with 0x.
1078         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1079             // Skip over 0xN.
1080             skip(3);
1081             // Skip over remaining digits.
1082             while (convertDigit(ch0, 16) != -1) {
1083                 skip(1);
1084             }
1085 
1086             type = HEXADECIMAL;
1087         } else {
1088             // Check for possible octal constant.
1089             boolean octal = digit == 0;
1090             // Skip first digit if not leading '.'.
1091             if (digit != -1) {
1092                 skip(1);
1093             }
1094 
1095             // Skip remaining digits.
1096             while ((digit = convertDigit(ch0, 10)) != -1) {
1097                 // Check octal only digits.
1098                 octal = octal && digit < 8;
1099                 // Skip digit.
1100                 skip(1);
1101             }
1102 
1103             if (octal && position - start > 1) {
1104                 type = OCTAL;
1105             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1106                 // Must be a double.
1107                 if (ch0 == '.') {
1108                     // Skip period.
1109                     skip(1);
1110                     // Skip mantissa.
1111                     while (convertDigit(ch0, 10) != -1) {
1112                         skip(1);
1113                     }
1114                 }
1115 
1116                 // Detect exponent.
1117                 if (ch0 == 'E' || ch0 == 'e') {
1118                     // Skip E.
1119                     skip(1);
1120                     // Detect and skip exponent sign.
1121                     if (ch0 == '+' || ch0 == '-') {
1122                         skip(1);
1123                     }
1124                     // Skip exponent.
1125                     while (convertDigit(ch0, 10) != -1) {
1126                         skip(1);
1127                     }
1128                 }
1129 
1130                 type = FLOATING;
1131             }
1132         }
1133 
1134         if (Character.isJavaIdentifierStart(ch0)) {
1135             error(Lexer.message("missing.space.after.number"), type, position, 1);
1136         }
1137 
1138         // Add number token.
1139         add(type, start);
1140     }
1141 
1142     /**
1143      * Convert a regex token to a token object.
1144      *
1145      * @param start  Position in source content.
1146      * @param length Length of regex token.
1147      * @return Regex token object.
1148      */
1149     XMLToken valueOfXML(final int start, final int length) {
1150         return new XMLToken(source.getString(start, length));
1151     }
1152 
1153     /**
1154      * Scan over a XML token.
1155      *
1156      * @return TRUE if is an XML literal.
1157      */
1158     private boolean scanXMLLiteral() {
1159         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1160         if (XML_LITERALS) {
1161             // Record beginning of xml expression.
1162             final int start = position;
1163 
1164             int openCount = 0;
1165 
1166             do {
1167                 if (ch0 == '<') {
1168                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1169                         skip(3);
1170                         openCount--;
1171                     } else if (Character.isJavaIdentifierStart(ch1)) {
1172                         skip(2);
1173                         openCount++;
1174                     } else if (ch1 == '?') {
1175                         skip(2);
1176                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1177                         skip(4);
1178                     } else {
1179                         reset(start);
1180                         return false;
1181                     }
1182 
1183                     while (!atEOF() && ch0 != '>') {
1184                         if (ch0 == '/' && ch1 == '>') {
1185                             openCount--;
1186                             skip(1);
1187                             break;
1188                         } else if (ch0 == '\"' || ch0 == '\'') {
1189                             scanString(false);
1190                         } else {
1191                             skip(1);
1192                         }
1193                     }
1194 
1195                     if (ch0 != '>') {
1196                         reset(start);
1197                         return false;
1198                     }
1199 
1200                     skip(1);
1201                 } else if (atEOF()) {
1202                     reset(start);
1203                     return false;
1204                 } else {
1205                     skip(1);
1206                 }
1207             } while (openCount > 0);
1208 
1209             add(XML, start);
1210             return true;
1211         }
1212 
1213         return false;
1214     }
1215 
1216     /**
1217      * Scan over identifier characters.
1218      *
1219      * @return Length of identifier or zero if none found.
1220      */
1221     private int scanIdentifier() {
1222         final int start = position;
1223 
1224         // Make sure first character is valid start character.
1225         if (ch0 == '\\' && ch1 == 'u') {
1226             skip(2);
1227             final int ch = hexSequence(4, TokenType.IDENT);
1228 
1229             if (!Character.isJavaIdentifierStart(ch)) {
1230                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1231             }
1232         } else if (!Character.isJavaIdentifierStart(ch0)) {
1233             // Not an identifier.
1234             return 0;
1235         }
1236 
1237         // Make sure remaining characters are valid part characters.
1238         while (!atEOF()) {
1239             if (ch0 == '\\' && ch1 == 'u') {
1240                 skip(2);
1241                 final int ch = hexSequence(4, TokenType.IDENT);
1242 
1243                 if (!Character.isJavaIdentifierPart(ch)) {
1244                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1245                 }
1246             } else if (Character.isJavaIdentifierPart(ch0)) {
1247                 skip(1);
1248             } else {
1249                 break;
1250             }
1251         }
1252 
1253         // Length of identifier sequence.
1254         return position - start;
1255     }
1256 
1257     /**
1258      * Compare two identifiers (in content) for equality.
1259      *
1260      * @param aStart  Start of first identifier.
1261      * @param aLength Length of first identifier.
1262      * @param bStart  Start of second identifier.
1263      * @param bLength Length of second identifier.
1264      * @return True if equal.
1265      */
1266     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1267         if (aLength == bLength) {
1268             for (int i = 0; i < aLength; i++) {
1269                 if (content[aStart + i] != content[bStart + i]) {
1270                     return false;
1271                 }
1272             }
1273 
1274             return true;
1275         }
1276 
1277         return false;
1278     }
1279 
1280     /**
1281      * Detect if a line starts with a marker identifier.
1282      *
1283      * @param identStart  Start of identifier.
1284      * @param identLength Length of identifier.
1285      * @return True if detected.
1286      */
1287     private boolean hasHereMarker(final int identStart, final int identLength) {
1288         // Skip any whitespace.
1289         skipWhitespace(false);
1290 
1291         return identifierEqual(identStart, identLength, position, scanIdentifier());
1292     }
1293 
1294     /**
1295      * Lexer to service edit strings.
1296      */
1297     private static class EditStringLexer extends Lexer {
1298         /** Type of string literals to emit. */
1299         final TokenType stringType;
1300 
1301         /*
1302          * Constructor.
1303          */
1304 
1305         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1306             super(lexer, stringState);
1307 
1308             this.stringType = stringType;
1309         }
1310 
1311         /**
1312          * Lexify the contents of the string.
1313          */
1314         @Override
1315         public void lexify() {
1316             // Record start of string position.
1317             int stringStart = position;
1318             // Indicate that the priming first string has not been emitted.
1319             boolean primed = false;
1320 
1321             while (true) {
1322                 // Detect end of content.
1323                 if (atEOF()) {
1324                     break;
1325                 }
1326 
1327                 // Honour escapes (should be well formed.)
1328                 if (ch0 == '\\' && stringType == ESCSTRING) {
1329                     skip(2);
1330 
1331                     continue;
1332                 }
1333 
1334                 // If start of expression.
1335                 if (ch0 == '$' && ch1 == '{') {
1336                     if (!primed || stringStart != position) {
1337                         if (primed) {
1338                             add(ADD, stringStart, stringStart + 1);
1339                         }
1340 
1341                         add(stringType, stringStart, position);
1342                         primed = true;
1343                     }
1344 
1345                     // Skip ${
1346                     skip(2);
1347 
1348                     // Save expression state.
1349                     final State expressionState = saveState();
1350 
1351                     // Start with one open brace.
1352                     int braceCount = 1;
1353 
1354                     // Scan for the rest of the string.
1355                     while (!atEOF()) {
1356                         // If closing brace.
1357                         if (ch0 == '}') {
1358                             // Break only only if matching brace.
1359                             if (--braceCount == 0) {
1360                                 break;
1361                             }
1362                         } else if (ch0 == '{') {
1363                             // Bump up the brace count.
1364                             braceCount++;
1365                         }
1366 
1367                         // Skip to next character.
1368                         skip(1);
1369                     }
1370 
1371                     // If braces don't match then report an error.
1372                     if (braceCount != 0) {
1373                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1374                     }
1375 
1376                     // Mark end of expression.
1377                     expressionState.setLimit(position);
1378                     // Skip closing brace.
1379                     skip(1);
1380 
1381                     // Start next string.
1382                     stringStart = position;
1383 
1384                     // Concatenate expression.
1385                     add(ADD, expressionState.position, expressionState.position + 1);
1386                     add(LPAREN, expressionState.position, expressionState.position + 1);
1387 
1388                     // Scan expression.
1389                     final Lexer lexer = new Lexer(this, expressionState);
1390                     lexer.lexify();
1391 
1392                     // Close out expression parenthesis.
1393                     add(RPAREN, position - 1, position);
1394 
1395                     continue;
1396                 }
1397 
1398                 // Next character in string.
1399                 skip(1);
1400             }
1401 
1402             // If there is any unemitted string portion.
1403             if (stringStart != limit) {
1404                 // Concatenate remaining string.
1405                 if (primed) {
1406                     add(ADD, stringStart, 1);
1407                 }
1408 
1409                 add(stringType, stringStart, limit);
1410             }
1411         }
1412 
1413     }
1414 
1415     /**
1416      * Edit string for nested expressions.
1417      *
1418      * @param stringType  Type of string literals to emit.
1419      * @param stringState State of lexer at start of string.
1420      */
1421     private void editString(final TokenType stringType, final State stringState) {
1422         // Use special lexer to scan string.
1423         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1424         lexer.lexify();
1425 
1426         // Need to keep lexer informed.
1427         last = stringType;
1428     }
1429 
1430     /**
1431      * Scan over a here string.
1432      *
1433      * @return TRUE if is a here string.
1434      */
1435     private boolean scanHereString(final LineInfoReceiver lir) {
1436         assert ch0 == '<' && ch1 == '<';
1437         if (scripting) {
1438             // Record beginning of here string.
1439             final State saved = saveState();
1440 
1441             // << or <<<
1442             final boolean excludeLastEOL = ch2 != '<';
1443 
1444             if (excludeLastEOL) {
1445                 skip(2);
1446             } else {
1447                 skip(3);
1448             }
1449 
1450             // Scan identifier. It might be quoted, indicating that no string editing should take place.
1451             final char quoteChar = ch0;
1452             final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1453             if (noStringEditing) {
1454                 skip(1);
1455             }
1456             final int identStart = position;
1457             final int identLength = scanIdentifier();
1458             if (noStringEditing) {
1459                 if (ch0 != quoteChar) {
1460                     error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1461                     restoreState(saved);
1462                     return false;
1463                 }
1464                 skip(1);
1465             }
1466 
1467             // Check for identifier.
1468             if (identLength == 0) {
1469                 // Treat as shift.
1470                 restoreState(saved);
1471 
1472                 return false;
1473             }
1474 
1475             // Record rest of line.
1476             final State restState = saveState();
1477             // keep line number updated
1478             int lastLine = line;
1479 
1480             skipLine(false);
1481             lastLine++;
1482             int lastLinePosition = position;
1483             restState.setLimit(position);
1484 
1485             // Record beginning of string.
1486             final State stringState = saveState();
1487             int stringEnd = position;
1488 
1489             // Hunt down marker.
1490             while (!atEOF()) {
1491                 // Skip any whitespace.
1492                 skipWhitespace(false);
1493 
1494                 if (hasHereMarker(identStart, identLength)) {
1495                     break;
1496                 }
1497 
1498                 skipLine(false);
1499                 lastLine++;
1500                 lastLinePosition = position;
1501                 stringEnd = position;
1502             }
1503 
1504             // notify last line information
1505             lir.lineInfo(lastLine, lastLinePosition);
1506 
1507             // Record end of string.
1508             stringState.setLimit(stringEnd);
1509 
1510             // If marker is missing.
1511             if (stringState.isEmpty() || atEOF()) {
1512                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1513                 restoreState(saved);
1514 
1515                 return false;
1516             }
1517 
1518             // Remove last end of line if specified.
1519             if (excludeLastEOL) {
1520                 // Handles \n.
1521                 if (content[stringEnd - 1] == '\n') {
1522                     stringEnd--;
1523                 }
1524 
1525                 // Handles \r and \r\n.
1526                 if (content[stringEnd - 1] == '\r') {
1527                     stringEnd--;
1528                 }
1529 
1530                 // Update end of string.
1531                 stringState.setLimit(stringEnd);
1532             }
1533 
1534             // Edit string if appropriate.
1535             if (!noStringEditing && !stringState.isEmpty()) {
1536                 editString(STRING, stringState);
1537             } else {
1538                 // Add here string.
1539                 add(STRING, stringState.position, stringState.limit);
1540             }
1541 
1542             // Scan rest of original line.
1543             final Lexer restLexer = new Lexer(this, restState);
1544 
1545             restLexer.lexify();
1546 
1547             return true;
1548         }
1549 
1550         return false;
1551     }
1552 
1553     /**
1554      * Breaks source content down into lex units, adding tokens to the token
1555      * stream. The routine scans until the stream buffer is full. Can be called
1556      * repeatedly until EOF is detected.
1557      */
1558     public void lexify() {
1559         while (!stream.isFull() || nested) {
1560             // Skip over whitespace.
1561             skipWhitespace(true);
1562 
1563             // Detect end of file.
1564             if (atEOF()) {
1565                 if (!nested) {
1566                     // Add an EOF token at the end.
1567                     add(EOF, position);
1568                 }
1569 
1570                 break;
1571             }
1572 
1573             // Check for comments. Note that we don't scan for regexp and other literals here as
1574             // we may not have enough context to distinguish them from similar looking operators.
1575             // Instead we break on ambiguous operators below and let the parser decide.
1576             if (ch0 == '/' && skipComments()) {
1577                 continue;
1578             }
1579 
1580             if (scripting && ch0 == '#' && skipComments()) {
1581                 continue;
1582             }
1583 
1584             // TokenType for lookup of delimiter or operator.
1585             TokenType type;
1586 
1587             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1588                 // '.' followed by digit.
1589                 // Scan and add a number.
1590                 scanNumber();
1591             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1592                 // Get the number of characters in the token.
1593                 final int typeLength = type.getLength();
1594                 // Skip that many characters.
1595                 skip(typeLength);
1596                 // Add operator token.
1597                 add(type, position - typeLength);
1598                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1599                 // We break to let the parser decide what it is.
1600                 if (canStartLiteral(type)) {
1601                     break;
1602                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1603                     pauseOnNextLeftBrace = false;
1604                     break;
1605                 }
1606             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1607                 // Scan and add identifier or keyword.
1608                 scanIdentifierOrKeyword();
1609             } else if (isStringDelimiter(ch0)) {
1610                 // Scan and add a string.
1611                 scanString(true);
1612             } else if (Character.isDigit(ch0)) {
1613                 // Scan and add a number.
1614                 scanNumber();
1615             } else {
1616                 // Don't recognize this character.
1617                 skip(1);
1618                 add(ERROR, position - 1);
1619             }
1620         }
1621     }
1622 
1623     /**
1624      * Return value of token given its token descriptor.
1625      *
1626      * @param token  Token descriptor.
1627      * @return JavaScript value.
1628      */
1629     Object getValueOf(final long token, final boolean strict) {
1630         final int start = Token.descPosition(token);
1631         final int len   = Token.descLength(token);
1632 
1633         switch (Token.descType(token)) {
1634         case DECIMAL:
1635             return Lexer.valueOf(source.getString(start, len), 10); // number
1636         case OCTAL:
1637             return Lexer.valueOf(source.getString(start, len), 8); // number
1638         case HEXADECIMAL:
1639             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1640         case FLOATING:
1641             final String str   = source.getString(start, len);
1642             final double value = Double.valueOf(str);
1643             if (str.indexOf('.') != -1) {
1644                 return value; //number
1645             }
1646             //anything without an explicit decimal point is still subject to a
1647             //"representable as int or long" check. Then the programmer does not
1648             //explicitly code something as a double. For example new Color(int, int, int)
1649             //and new Color(float, float, float) will get ambiguous for cases like
1650             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1651             //yet we don't want e.g. 1e6 to be a double unnecessarily
1652             if (JSType.isStrictlyRepresentableAsInt(value)) {
1653                 return (int)value;
1654             }
1655             return value;
1656         case STRING:
1657             return source.getString(start, len); // String
1658         case ESCSTRING:
1659             return valueOfString(start, len, strict); // String
1660         case IDENT:
1661             return valueOfIdent(start, len); // String
1662         case REGEX:
1663             return valueOfPattern(start, len); // RegexToken::LexerToken
1664         case XML:
1665             return valueOfXML(start, len); // XMLToken::LexerToken
1666         case DIRECTIVE_COMMENT:
1667             return source.getString(start, len);
1668         default:
1669             break;
1670         }
1671 
1672         return null;
1673     }
1674 
1675     /**
1676      * Get the correctly localized error message for a given message id format arguments
1677      * @param msgId message id
1678      * @param args  format arguments
1679      * @return message
1680      */
1681     protected static String message(final String msgId, final String... args) {
1682         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1683     }
1684 
1685     /**
1686      * Generate a runtime exception
1687      *
1688      * @param message       error message
1689      * @param type          token type
1690      * @param start         start position of lexed error
1691      * @param length        length of lexed error
1692      * @throws ParserException  unconditionally
1693      */
1694     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1695         final long token     = Token.toDesc(type, start, length);
1696         final int  pos       = Token.descPosition(token);
1697         final int  lineNum   = source.getLine(pos);
1698         final int  columnNum = source.getColumn(pos);
1699         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1700         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1701     }
1702 
1703     /**
1704      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1705      * This is the abstract superclass
1706      */
1707     public static abstract class LexerToken implements Serializable {
1708         private static final long serialVersionUID = 1L;
1709 
1710         private final String expression;
1711 
1712         /**
1713          * Constructor
1714          * @param expression token expression
1715          */
1716         protected LexerToken(final String expression) {
1717             this.expression = expression;
1718         }
1719 
1720         /**
1721          * Get the expression
1722          * @return expression
1723          */
1724         public String getExpression() {
1725             return expression;
1726         }
1727     }
1728 
1729     /**
1730      * Temporary container for regular expressions.
1731      */
1732     public static class RegexToken extends LexerToken {
1733         private static final long serialVersionUID = 1L;
1734 
1735         /** Options. */
1736         private final String options;
1737 
1738         /**
1739          * Constructor.
1740          *
1741          * @param expression  regexp expression
1742          * @param options     regexp options
1743          */
1744         public RegexToken(final String expression, final String options) {
1745             super(expression);
1746             this.options = options;
1747         }
1748 
1749         /**
1750          * Get regexp options
1751          * @return options
1752          */
1753         public String getOptions() {
1754             return options;
1755         }
1756 
1757         @Override
1758         public String toString() {
1759             return '/' + getExpression() + '/' + options;
1760         }
1761     }
1762 
1763     /**
1764      * Temporary container for XML expression.
1765      */
1766     public static class XMLToken extends LexerToken {
1767         private static final long serialVersionUID = 1L;
1768 
1769         /**
1770          * Constructor.
1771          *
1772          * @param expression  XML expression
1773          */
1774         public XMLToken(final String expression) {
1775             super(expression);
1776         }
1777     }
1778 }