1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  32 import static jdk.nashorn.internal.parser.TokenType.EOF;
  33 import static jdk.nashorn.internal.parser.TokenType.EOL;
  34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  44 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  46 import static jdk.nashorn.internal.parser.TokenType.STRING;
  47 import static jdk.nashorn.internal.parser.TokenType.XML;
  48 
  49 import java.io.Serializable;
  50 import jdk.nashorn.internal.runtime.ECMAErrors;
  51 import jdk.nashorn.internal.runtime.ErrorManager;
  52 import jdk.nashorn.internal.runtime.JSErrorType;
  53 import jdk.nashorn.internal.runtime.JSType;
  54 import jdk.nashorn.internal.runtime.ParserException;
  55 import jdk.nashorn.internal.runtime.Source;
  56 import jdk.nashorn.internal.runtime.options.Options;
  57 
  58 /**
  59  * Responsible for converting source content into a stream of tokens.
  60  *
  61  */
  62 @SuppressWarnings("fallthrough")
  63 public class Lexer extends Scanner {
  64     private static final long MIN_INT_L = Integer.MIN_VALUE;
  65     private static final long MAX_INT_L = Integer.MAX_VALUE;
  66 
  67     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  68 
  69     /** Content source. */
  70     private final Source source;
  71 
  72     /** Buffered stream for tokens. */
  73     private final TokenStream stream;
  74 
  75     /** True if here and edit strings are supported. */
  76     private final boolean scripting;
  77 
  78     /** True if a nested scan. (scan to completion, no EOF.) */
  79     private final boolean nested;
  80 
  81     /** Pending new line number and position. */
  82     int pendingLine;
  83 
  84     /** Position of last EOL + 1. */
  85     private int linePosition;
  86 
  87     /** Type of last token added. */
  88     private TokenType last;
  89 
  90     private final boolean pauseOnFunctionBody;
  91     private boolean pauseOnNextLeftBrace;
  92 
  93     private static final String SPACETAB = " \t";  // ASCII space and tab
  94     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
  95 
  96     private static final String JAVASCRIPT_WHITESPACE_EOL =
  97         LFCR +
  98         "\u2028" + // line separator
  99         "\u2029"   // paragraph separator
 100         ;
 101     private static final String JAVASCRIPT_WHITESPACE =
 102         SPACETAB +
 103         JAVASCRIPT_WHITESPACE_EOL +
 104         "\u000b" + // tabulation line
 105         "\u000c" + // ff (ctrl-l)
 106         "\u00a0" + // Latin-1 space
 107         "\u1680" + // Ogham space mark
 108         "\u180e" + // separator, Mongolian vowel
 109         "\u2000" + // en quad
 110         "\u2001" + // em quad
 111         "\u2002" + // en space
 112         "\u2003" + // em space
 113         "\u2004" + // three-per-em space
 114         "\u2005" + // four-per-em space
 115         "\u2006" + // six-per-em space
 116         "\u2007" + // figure space
 117         "\u2008" + // punctuation space
 118         "\u2009" + // thin space
 119         "\u200a" + // hair space
 120         "\u202f" + // narrow no-break space
 121         "\u205f" + // medium mathematical space
 122         "\u3000" + // ideographic space
 123         "\ufeff"   // byte order mark
 124         ;
 125 
 126     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 127         "\\u000a" + // line feed
 128         "\\u000d" + // carriage return (ctrl-m)
 129         "\\u2028" + // line separator
 130         "\\u2029" + // paragraph separator
 131         "\\u0009" + // tab
 132         "\\u0020" + // ASCII space
 133         "\\u000b" + // tabulation line
 134         "\\u000c" + // ff (ctrl-l)
 135         "\\u00a0" + // Latin-1 space
 136         "\\u1680" + // Ogham space mark
 137         "\\u180e" + // separator, Mongolian vowel
 138         "\\u2000" + // en quad
 139         "\\u2001" + // em quad
 140         "\\u2002" + // en space
 141         "\\u2003" + // em space
 142         "\\u2004" + // three-per-em space
 143         "\\u2005" + // four-per-em space
 144         "\\u2006" + // six-per-em space
 145         "\\u2007" + // figure space
 146         "\\u2008" + // punctuation space
 147         "\\u2009" + // thin space
 148         "\\u200a" + // hair space
 149         "\\u202f" + // narrow no-break space
 150         "\\u205f" + // medium mathematical space
 151         "\\u3000" + // ideographic space
 152         "\\ufeff"   // byte order mark
 153         ;
 154 
 155     static String unicodeEscape(final char ch) {
 156         final StringBuilder sb = new StringBuilder();
 157 
 158         sb.append("\\u");
 159 
 160         final String hex = Integer.toHexString(ch);
 161         for (int i = hex.length(); i < 4; i++) {
 162             sb.append('0');
 163         }
 164         sb.append(hex);
 165 
 166         return sb.toString();
 167     }
 168 
 169     /**
 170      * Constructor
 171      *
 172      * @param source    the source
 173      * @param stream    the token stream to lex
 174      */
 175     public Lexer(final Source source, final TokenStream stream) {
 176         this(source, stream, false);
 177     }
 178 
 179     /**
 180      * Constructor
 181      *
 182      * @param source    the source
 183      * @param stream    the token stream to lex
 184      * @param scripting are we in scripting mode
 185      */
 186     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 187         this(source, 0, source.getLength(), stream, scripting, false);
 188     }
 189 
 190     /**
 191      * Constructor
 192      *
 193      * @param source    the source
 194      * @param start     start position in source from which to start lexing
 195      * @param len       length of source segment to lex
 196      * @param stream    token stream to lex
 197      * @param scripting are we in scripting mode
 198      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 199      * function body. This is used with the feature where the parser is skipping nested function bodies to
 200      * avoid reading ahead unnecessarily when we skip the function bodies.
 201      */
 202 
 203     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
 204         super(source.getContent(), 1, start, len);
 205         this.source      = source;
 206         this.stream      = stream;
 207         this.scripting   = scripting;
 208         this.nested      = false;
 209         this.pendingLine = 1;
 210         this.last        = EOL;
 211 
 212         this.pauseOnFunctionBody = pauseOnFunctionBody;
 213     }
 214 
 215     private Lexer(final Lexer lexer, final State state) {
 216         super(lexer, state);
 217 
 218         source = lexer.source;
 219         stream = lexer.stream;
 220         scripting = lexer.scripting;
 221         nested = true;
 222 
 223         pendingLine = state.pendingLine;
 224         linePosition = state.linePosition;
 225         last = EOL;
 226         pauseOnFunctionBody = false;
 227     }
 228 
 229     static class State extends Scanner.State {
 230         /** Pending new line number and position. */
 231         public final int pendingLine;
 232 
 233         /** Position of last EOL + 1. */
 234         public final int linePosition;
 235 
 236         /** Type of last token added. */
 237         public final TokenType last;
 238 
 239         /*
 240          * Constructor.
 241          */
 242 
 243         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 244             super(position, limit, line);
 245 
 246             this.pendingLine = pendingLine;
 247             this.linePosition = linePosition;
 248             this.last = last;
 249         }
 250     }
 251 
 252     /**
 253      * Save the state of the scan.
 254      *
 255      * @return Captured state.
 256      */
 257     @Override
 258     State saveState() {
 259         return new State(position, limit, line, pendingLine, linePosition, last);
 260     }
 261 
 262     /**
 263      * Restore the state of the scan.
 264      *
 265      * @param state
 266      *            Captured state.
 267      */
 268     void restoreState(final State state) {
 269         super.restoreState(state);
 270 
 271         pendingLine = state.pendingLine;
 272         linePosition = state.linePosition;
 273         last = state.last;
 274     }
 275 
 276     /**
 277      * Add a new token to the stream.
 278      *
 279      * @param type
 280      *            Token type.
 281      * @param start
 282      *            Start position.
 283      * @param end
 284      *            End position.
 285      */
 286     protected void add(final TokenType type, final int start, final int end) {
 287         // Record last token.
 288         last = type;
 289 
 290         // Only emit the last EOL in a cluster.
 291         if (type == EOL) {
 292             pendingLine = end;
 293             linePosition = start;
 294         } else {
 295             // Write any pending EOL to stream.
 296             if (pendingLine != -1) {
 297                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 298                 pendingLine = -1;
 299             }
 300 
 301             // Write token to stream.
 302             stream.put(Token.toDesc(type, start, end - start));
 303         }
 304     }
 305 
 306     /**
 307      * Add a new token to the stream.
 308      *
 309      * @param type
 310      *            Token type.
 311      * @param start
 312      *            Start position.
 313      */
 314     protected void add(final TokenType type, final int start) {
 315         add(type, start, position);
 316     }
 317 
 318     /**
 319      * Return the String of valid whitespace characters for regular
 320      * expressions in JavaScript
 321      * @return regexp whitespace string
 322      */
 323     public static String getWhitespaceRegExp() {
 324         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 325     }
 326 
 327     /**
 328      * Skip end of line.
 329      *
 330      * @param addEOL true if EOL token should be recorded.
 331      */
 332     private void skipEOL(final boolean addEOL) {
 333 
 334         if (ch0 == '\r') { // detect \r\n pattern
 335             skip(1);
 336             if (ch0 == '\n') {
 337                 skip(1);
 338             }
 339         } else { // all other space, ch0 is guaranteed to be EOL or \0
 340             skip(1);
 341         }
 342 
 343         // bump up line count
 344         line++;
 345 
 346         if (addEOL) {
 347             // Add an EOL token.
 348             add(EOL, position, line);
 349         }
 350     }
 351 
 352     /**
 353      * Skip over rest of line including end of line.
 354      *
 355      * @param addEOL true if EOL token should be recorded.
 356      */
 357     private void skipLine(final boolean addEOL) {
 358         // Ignore characters.
 359         while (!isEOL(ch0) && !atEOF()) {
 360             skip(1);
 361         }
 362         // Skip over end of line.
 363         skipEOL(addEOL);
 364     }
 365 
 366     /**
 367      * Test whether a char is valid JavaScript whitespace
 368      * @param ch a char
 369      * @return true if valid JavaScript whitespace
 370      */
 371     public static boolean isJSWhitespace(final char ch) {
 372         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 373     }
 374 
 375     /**
 376      * Test whether a char is valid JavaScript end of line
 377      * @param ch a char
 378      * @return true if valid JavaScript end of line
 379      */
 380     public static boolean isJSEOL(final char ch) {
 381         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 382     }
 383 
 384     /**
 385      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 386      * strings ('`') in scripting mode.
 387      * @param ch a char
 388      * @return true if string delimiter
 389      */
 390     protected boolean isStringDelimiter(final char ch) {
 391         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 392     }
 393 
 394     /**
 395      * Test whether a char is valid JavaScript whitespace
 396      * @param ch a char
 397      * @return true if valid JavaScript whitespace
 398      */
 399     protected boolean isWhitespace(final char ch) {
 400         return Lexer.isJSWhitespace(ch);
 401     }
 402 
 403     /**
 404      * Test whether a char is valid JavaScript end of line
 405      * @param ch a char
 406      * @return true if valid JavaScript end of line
 407      */
 408     protected boolean isEOL(final char ch) {
 409         return Lexer.isJSEOL(ch);
 410     }
 411 
 412     /**
 413      * Skip over whitespace and detect end of line, adding EOL tokens if
 414      * encountered.
 415      *
 416      * @param addEOL true if EOL tokens should be recorded.
 417      */
 418     private void skipWhitespace(final boolean addEOL) {
 419         while (isWhitespace(ch0)) {
 420             if (isEOL(ch0)) {
 421                 skipEOL(addEOL);
 422             } else {
 423                 skip(1);
 424             }
 425         }
 426     }
 427 
 428     /**
 429      * Skip over comments.
 430      *
 431      * @return True if a comment.
 432      */
 433     protected boolean skipComments() {
 434         // Save the current position.
 435         final int start = position;
 436 
 437         if (ch0 == '/') {
 438             // Is it a // comment.
 439             if (ch1 == '/') {
 440                 // Skip over //.
 441                 skip(2);
 442 
 443                 boolean directiveComment = false;
 444                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 445                     directiveComment = true;
 446                 }
 447 
 448                 // Scan for EOL.
 449                 while (!atEOF() && !isEOL(ch0)) {
 450                     skip(1);
 451                 }
 452                 // Did detect a comment.
 453                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 454                 return true;
 455             } else if (ch1 == '*') {
 456                 // Skip over /*.
 457                 skip(2);
 458                 // Scan for */.
 459                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 460                     // If end of line handle else skip character.
 461                     if (isEOL(ch0)) {
 462                         skipEOL(true);
 463                     } else {
 464                         skip(1);
 465                     }
 466                 }
 467 
 468                 if (atEOF()) {
 469                     // TODO - Report closing */ missing in parser.
 470                     add(ERROR, start);
 471                 } else {
 472                     // Skip */.
 473                     skip(2);
 474                 }
 475 
 476                 // Did detect a comment.
 477                 add(COMMENT, start);
 478                 return true;
 479             }
 480         } else if (ch0 == '#') {
 481             assert scripting;
 482             // shell style comment
 483             // Skip over #.
 484             skip(1);
 485             // Scan for EOL.
 486             while (!atEOF() && !isEOL(ch0)) {
 487                 skip(1);
 488             }
 489             // Did detect a comment.
 490             add(COMMENT, start);
 491             return true;
 492         }
 493 
 494         // Not a comment.
 495         return false;
 496     }
 497 
 498     /**
 499      * Convert a regex token to a token object.
 500      *
 501      * @param start  Position in source content.
 502      * @param length Length of regex token.
 503      * @return Regex token object.
 504      */
 505     public RegexToken valueOfPattern(final int start, final int length) {
 506         // Save the current position.
 507         final int savePosition = position;
 508         // Reset to beginning of content.
 509         reset(start);
 510         // Buffer for recording characters.
 511         final StringBuilder sb = new StringBuilder(length);
 512 
 513         // Skip /.
 514         skip(1);
 515         boolean inBrackets = false;
 516         // Scan for closing /, stopping at end of line.
 517         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 518             // Skip over escaped character.
 519             if (ch0 == '\\') {
 520                 sb.append(ch0);
 521                 sb.append(ch1);
 522                 skip(2);
 523             } else {
 524                 if (ch0 == '[') {
 525                     inBrackets = true;
 526                 } else if (ch0 == ']') {
 527                     inBrackets = false;
 528                 }
 529 
 530                 // Skip literal character.
 531                 sb.append(ch0);
 532                 skip(1);
 533             }
 534         }
 535 
 536         // Get pattern as string.
 537         final String regex = sb.toString();
 538 
 539         // Skip /.
 540         skip(1);
 541 
 542         // Options as string.
 543         final String options = source.getString(position, scanIdentifier());
 544 
 545         reset(savePosition);
 546 
 547         // Compile the pattern.
 548         return new RegexToken(regex, options);
 549     }
 550 
 551     /**
 552      * Return true if the given token can be the beginning of a literal.
 553      *
 554      * @param token a token
 555      * @return true if token can start a literal.
 556      */
 557     public boolean canStartLiteral(final TokenType token) {
 558         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 559     }
 560 
 561     /**
 562      * interface to receive line information for multi-line literals.
 563      */
 564     protected interface LineInfoReceiver {
 565         /**
 566          * Receives line information
 567          * @param line last line number
 568          * @param linePosition position of last line
 569          */
 570         public void lineInfo(int line, int linePosition);
 571     }
 572 
 573     /**
 574      * Check whether the given token represents the beginning of a literal. If so scan
 575      * the literal and return <tt>true</tt>, otherwise return false.
 576      *
 577      * @param token the token.
 578      * @param startTokenType the token type.
 579      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 580      * @return True if a literal beginning with startToken was found and scanned.
 581      */
 582     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 583         // Check if it can be a literal.
 584         if (!canStartLiteral(startTokenType)) {
 585             return false;
 586         }
 587         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 588         if (stream.get(stream.last()) != token) {
 589             return false;
 590         }
 591         // Rewind to token start position
 592         reset(Token.descPosition(token));
 593 
 594         if (ch0 == '/') {
 595             return scanRegEx();
 596         } else if (ch0 == '<') {
 597             if (ch1 == '<') {
 598                 return scanHereString(lir);
 599             } else if (Character.isJavaIdentifierStart(ch1)) {
 600                 return scanXMLLiteral();
 601             }
 602         }
 603 
 604         return false;
 605     }
 606 
 607     /**
 608      * Scan over regex literal.
 609      *
 610      * @return True if a regex literal.
 611      */
 612     private boolean scanRegEx() {
 613         assert ch0 == '/';
 614         // Make sure it's not a comment.
 615         if (ch1 != '/' && ch1 != '*') {
 616             // Record beginning of literal.
 617             final int start = position;
 618             // Skip /.
 619             skip(1);
 620             boolean inBrackets = false;
 621 
 622             // Scan for closing /, stopping at end of line.
 623             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 624                 // Skip over escaped character.
 625                 if (ch0 == '\\') {
 626                     skip(1);
 627                     if (isEOL(ch0)) {
 628                         reset(start);
 629                         return false;
 630                     }
 631                     skip(1);
 632                 } else {
 633                     if (ch0 == '[') {
 634                         inBrackets = true;
 635                     } else if (ch0 == ']') {
 636                         inBrackets = false;
 637                     }
 638 
 639                     // Skip literal character.
 640                     skip(1);
 641                 }
 642             }
 643 
 644             // If regex literal.
 645             if (ch0 == '/') {
 646                 // Skip /.
 647                 skip(1);
 648 
 649                 // Skip over options.
 650                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 651                     skip(1);
 652                 }
 653 
 654                 // Add regex token.
 655                 add(REGEX, start);
 656                 // Regex literal detected.
 657                 return true;
 658             }
 659 
 660             // False start try again.
 661             reset(start);
 662         }
 663 
 664         // Regex literal not detected.
 665         return false;
 666     }
 667 
 668     /**
 669      * Convert a digit to a integer.  Can't use Character.digit since we are
 670      * restricted to ASCII by the spec.
 671      *
 672      * @param ch   Character to convert.
 673      * @param base Numeric base.
 674      *
 675      * @return The converted digit or -1 if invalid.
 676      */
 677     protected static int convertDigit(final char ch, final int base) {
 678         int digit;
 679 
 680         if ('0' <= ch && ch <= '9') {
 681             digit = ch - '0';
 682         } else if ('A' <= ch && ch <= 'Z') {
 683             digit = ch - 'A' + 10;
 684         } else if ('a' <= ch && ch <= 'z') {
 685             digit = ch - 'a' + 10;
 686         } else {
 687             return -1;
 688         }
 689 
 690         return digit < base ? digit : -1;
 691     }
 692 
 693 
 694     /**
 695      * Get the value of a hexadecimal numeric sequence.
 696      *
 697      * @param length Number of digits.
 698      * @param type   Type of token to report against.
 699      * @return Value of sequence or < 0 if no digits.
 700      */
 701     private int hexSequence(final int length, final TokenType type) {
 702         int value = 0;
 703 
 704         for (int i = 0; i < length; i++) {
 705             final int digit = convertDigit(ch0, 16);
 706 
 707             if (digit == -1) {
 708                 error(Lexer.message("invalid.hex"), type, position, limit);
 709                 return i == 0 ? -1 : value;
 710             }
 711 
 712             value = digit | value << 4;
 713             skip(1);
 714         }
 715 
 716         return value;
 717     }
 718 
 719     /**
 720      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 721      *
 722      * @return Value of sequence.
 723      */
 724     private int octalSequence() {
 725         int value = 0;
 726 
 727         for (int i = 0; i < 3; i++) {
 728             final int digit = convertDigit(ch0, 8);
 729 
 730             if (digit == -1) {
 731                 break;
 732             }
 733             value = digit | value << 3;
 734             skip(1);
 735 
 736             if (i == 1 && value >= 32) {
 737                 break;
 738             }
 739         }
 740         return value;
 741     }
 742 
 743     /**
 744      * Convert a string to a JavaScript identifier.
 745      *
 746      * @param start  Position in source content.
 747      * @param length Length of token.
 748      * @return Ident string or null if an error.
 749      */
 750     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 751         // Save the current position.
 752         final int savePosition = position;
 753         // End of scan.
 754         final int end = start + length;
 755         // Reset to beginning of content.
 756         reset(start);
 757         // Buffer for recording characters.
 758         final StringBuilder sb = new StringBuilder(length);
 759 
 760         // Scan until end of line or end of file.
 761         while (!atEOF() && position < end && !isEOL(ch0)) {
 762             // If escape character.
 763             if (ch0 == '\\' && ch1 == 'u') {
 764                 skip(2);
 765                 final int ch = hexSequence(4, TokenType.IDENT);
 766                 if (isWhitespace((char)ch)) {
 767                     return null;
 768                 }
 769                 if (ch < 0) {
 770                     sb.append('\\');
 771                     sb.append('u');
 772                 } else {
 773                     sb.append((char)ch);
 774                 }
 775             } else {
 776                 // Add regular character.
 777                 sb.append(ch0);
 778                 skip(1);
 779             }
 780         }
 781 
 782         // Restore position.
 783         reset(savePosition);
 784 
 785         return sb.toString();
 786     }
 787 
 788     /**
 789      * Scan over and identifier or keyword. Handles identifiers containing
 790      * encoded Unicode chars.
 791      *
 792      * Example:
 793      *
 794      * var \u0042 = 44;
 795      */
 796     private void scanIdentifierOrKeyword() {
 797         // Record beginning of identifier.
 798         final int start = position;
 799         // Scan identifier.
 800         final int length = scanIdentifier();
 801         // Check to see if it is a keyword.
 802         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 803         if (type == FUNCTION && pauseOnFunctionBody) {
 804             pauseOnNextLeftBrace = true;
 805         }
 806         // Add keyword or identifier token.
 807         add(type, start);
 808     }
 809 
 810     /**
 811      * Convert a string to a JavaScript string object.
 812      *
 813      * @param start  Position in source content.
 814      * @param length Length of token.
 815      * @return JavaScript string object.
 816      */
 817     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 818         // Save the current position.
 819         final int savePosition = position;
 820         // Calculate the end position.
 821         final int end = start + length;
 822         // Reset to beginning of string.
 823         reset(start);
 824 
 825         // Buffer for recording characters.
 826         final StringBuilder sb = new StringBuilder(length);
 827 
 828         // Scan until end of string.
 829         while (position < end) {
 830             // If escape character.
 831             if (ch0 == '\\') {
 832                 skip(1);
 833 
 834                 final char next = ch0;
 835                 final int afterSlash = position;
 836 
 837                 skip(1);
 838 
 839                 // Special characters.
 840                 switch (next) {
 841                 case '0':
 842                 case '1':
 843                 case '2':
 844                 case '3':
 845                 case '4':
 846                 case '5':
 847                 case '6':
 848                 case '7': {
 849                     if (strict) {
 850                         // "\0" itself is allowed in strict mode. Only other 'real'
 851                         // octal escape sequences are not allowed (eg. "\02", "\31").
 852                         // See section 7.8.4 String literals production EscapeSequence
 853                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 854                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 855                         }
 856                     }
 857                     reset(afterSlash);
 858                     // Octal sequence.
 859                     final int ch = octalSequence();
 860 
 861                     if (ch < 0) {
 862                         sb.append('\\');
 863                         sb.append('x');
 864                     } else {
 865                         sb.append((char)ch);
 866                     }
 867                     break;
 868                 }
 869                 case 'n':
 870                     sb.append('\n');
 871                     break;
 872                 case 't':
 873                     sb.append('\t');
 874                     break;
 875                 case 'b':
 876                     sb.append('\b');
 877                     break;
 878                 case 'f':
 879                     sb.append('\f');
 880                     break;
 881                 case 'r':
 882                     sb.append('\r');
 883                     break;
 884                 case '\'':
 885                     sb.append('\'');
 886                     break;
 887                 case '\"':
 888                     sb.append('\"');
 889                     break;
 890                 case '\\':
 891                     sb.append('\\');
 892                     break;
 893                 case '\r': // CR | CRLF
 894                     if (ch0 == '\n') {
 895                         skip(1);
 896                     }
 897                     // fall through
 898                 case '\n': // LF
 899                 case '\u2028': // LS
 900                 case '\u2029': // PS
 901                     // continue on the next line, slash-return continues string
 902                     // literal
 903                     break;
 904                 case 'x': {
 905                     // Hex sequence.
 906                     final int ch = hexSequence(2, STRING);
 907 
 908                     if (ch < 0) {
 909                         sb.append('\\');
 910                         sb.append('x');
 911                     } else {
 912                         sb.append((char)ch);
 913                     }
 914                 }
 915                     break;
 916                 case 'u': {
 917                     // Unicode sequence.
 918                     final int ch = hexSequence(4, STRING);
 919 
 920                     if (ch < 0) {
 921                         sb.append('\\');
 922                         sb.append('u');
 923                     } else {
 924                         sb.append((char)ch);
 925                     }
 926                 }
 927                     break;
 928                 case 'v':
 929                     sb.append('\u000B');
 930                     break;
 931                 // All other characters.
 932                 default:
 933                     sb.append(next);
 934                     break;
 935                 }
 936             } else {
 937                 // Add regular character.
 938                 sb.append(ch0);
 939                 skip(1);
 940             }
 941         }
 942 
 943         // Restore position.
 944         reset(savePosition);
 945 
 946         return sb.toString();
 947     }
 948 
 949     /**
 950      * Scan over a string literal.
 951      * @param add true if we nare not just scanning but should actually modify the token stream
 952      */
 953     protected void scanString(final boolean add) {
 954         // Type of string.
 955         TokenType type = STRING;
 956         // Record starting quote.
 957         final char quote = ch0;
 958         // Skip over quote.
 959         skip(1);
 960 
 961         // Record beginning of string content.
 962         final State stringState = saveState();
 963 
 964         // Scan until close quote or end of line.
 965         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 966             // Skip over escaped character.
 967             if (ch0 == '\\') {
 968                 type = ESCSTRING;
 969                 skip(1);
 970                 if (! isEscapeCharacter(ch0)) {
 971                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 972                 }
 973                 if (isEOL(ch0)) {
 974                     // Multiline string literal
 975                     skipEOL(false);
 976                     continue;
 977                 }
 978             }
 979             // Skip literal character.
 980             skip(1);
 981         }
 982 
 983         // If close quote.
 984         if (ch0 == quote) {
 985             // Skip close quote.
 986             skip(1);
 987         } else {
 988             error(Lexer.message("missing.close.quote"), STRING, position, limit);
 989         }
 990 
 991         // If not just scanning.
 992         if (add) {
 993             // Record end of string.
 994             stringState.setLimit(position - 1);
 995 
 996             if (scripting && !stringState.isEmpty()) {
 997                 switch (quote) {
 998                 case '`':
 999                     // Mark the beginning of an exec string.
1000                     add(EXECSTRING, stringState.position, stringState.limit);
1001                     // Frame edit string with left brace.
1002                     add(LBRACE, stringState.position, stringState.position);
1003                     // Process edit string.
1004                     editString(type, stringState);
1005                     // Frame edit string with right brace.
1006                     add(RBRACE, stringState.limit, stringState.limit);
1007                     break;
1008                 case '"':
1009                     // Only edit double quoted strings.
1010                     editString(type, stringState);
1011                     break;
1012                 case '\'':
1013                     // Add string token without editing.
1014                     add(type, stringState.position, stringState.limit);
1015                     break;
1016                 default:
1017                     break;
1018                 }
1019             } else {
1020                 /// Add string token without editing.
1021                 add(type, stringState.position, stringState.limit);
1022             }
1023         }
1024     }
1025 
1026     /**
1027      * Is the given character a valid escape char after "\" ?
1028      *
1029      * @param ch character to be checked
1030      * @return if the given character is valid after "\"
1031      */
1032     protected boolean isEscapeCharacter(final char ch) {
1033         return true;
1034     }
1035 
1036     /**
1037      * Convert string to number.
1038      *
1039      * @param valueString  String to convert.
1040      * @param radix        Numeric base.
1041      * @return Converted number.
1042      */
1043     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1044         try {
1045             final long value = Long.parseLong(valueString, radix);
1046             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1047                 return Integer.valueOf((int)value);
1048             }
1049             return Long.valueOf(value);
1050         } catch (final NumberFormatException e) {
1051             if (radix == 10) {
1052                 return Double.valueOf(valueString);
1053             }
1054 
1055             double value = 0.0;
1056 
1057             for (int i = 0; i < valueString.length(); i++) {
1058                 final char ch = valueString.charAt(i);
1059                 // Preverified, should always be a valid digit.
1060                 final int digit = convertDigit(ch, radix);
1061                 value *= radix;
1062                 value += digit;
1063             }
1064 
1065             return value;
1066         }
1067     }
1068 
1069     /**
1070      * Scan a number.
1071      */
1072     protected void scanNumber() {
1073         // Record beginning of number.
1074         final int start = position;
1075         // Assume value is a decimal.
1076         TokenType type = DECIMAL;
1077 
1078         // First digit of number.
1079         int digit = convertDigit(ch0, 10);
1080 
1081         // If number begins with 0x.
1082         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1083             // Skip over 0xN.
1084             skip(3);
1085             // Skip over remaining digits.
1086             while (convertDigit(ch0, 16) != -1) {
1087                 skip(1);
1088             }
1089 
1090             type = HEXADECIMAL;
1091         } else {
1092             // Check for possible octal constant.
1093             boolean octal = digit == 0;
1094             // Skip first digit if not leading '.'.
1095             if (digit != -1) {
1096                 skip(1);
1097             }
1098 
1099             // Skip remaining digits.
1100             while ((digit = convertDigit(ch0, 10)) != -1) {
1101                 // Check octal only digits.
1102                 octal = octal && digit < 8;
1103                 // Skip digit.
1104                 skip(1);
1105             }
1106 
1107             if (octal && position - start > 1) {
1108                 type = OCTAL;
1109             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1110                 // Must be a double.
1111                 if (ch0 == '.') {
1112                     // Skip period.
1113                     skip(1);
1114                     // Skip mantissa.
1115                     while (convertDigit(ch0, 10) != -1) {
1116                         skip(1);
1117                     }
1118                 }
1119 
1120                 // Detect exponent.
1121                 if (ch0 == 'E' || ch0 == 'e') {
1122                     // Skip E.
1123                     skip(1);
1124                     // Detect and skip exponent sign.
1125                     if (ch0 == '+' || ch0 == '-') {
1126                         skip(1);
1127                     }
1128                     // Skip exponent.
1129                     while (convertDigit(ch0, 10) != -1) {
1130                         skip(1);
1131                     }
1132                 }
1133 
1134                 type = FLOATING;
1135             }
1136         }
1137 
1138         if (Character.isJavaIdentifierStart(ch0)) {
1139             error(Lexer.message("missing.space.after.number"), type, position, 1);
1140         }
1141 
1142         // Add number token.
1143         add(type, start);
1144     }
1145 
1146     /**
1147      * Convert a regex token to a token object.
1148      *
1149      * @param start  Position in source content.
1150      * @param length Length of regex token.
1151      * @return Regex token object.
1152      */
1153     XMLToken valueOfXML(final int start, final int length) {
1154         return new XMLToken(source.getString(start, length));
1155     }
1156 
1157     /**
1158      * Scan over a XML token.
1159      *
1160      * @return TRUE if is an XML literal.
1161      */
1162     private boolean scanXMLLiteral() {
1163         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1164         if (XML_LITERALS) {
1165             // Record beginning of xml expression.
1166             final int start = position;
1167 
1168             int openCount = 0;
1169 
1170             do {
1171                 if (ch0 == '<') {
1172                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1173                         skip(3);
1174                         openCount--;
1175                     } else if (Character.isJavaIdentifierStart(ch1)) {
1176                         skip(2);
1177                         openCount++;
1178                     } else if (ch1 == '?') {
1179                         skip(2);
1180                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1181                         skip(4);
1182                     } else {
1183                         reset(start);
1184                         return false;
1185                     }
1186 
1187                     while (!atEOF() && ch0 != '>') {
1188                         if (ch0 == '/' && ch1 == '>') {
1189                             openCount--;
1190                             skip(1);
1191                             break;
1192                         } else if (ch0 == '\"' || ch0 == '\'') {
1193                             scanString(false);
1194                         } else {
1195                             skip(1);
1196                         }
1197                     }
1198 
1199                     if (ch0 != '>') {
1200                         reset(start);
1201                         return false;
1202                     }
1203 
1204                     skip(1);
1205                 } else if (atEOF()) {
1206                     reset(start);
1207                     return false;
1208                 } else {
1209                     skip(1);
1210                 }
1211             } while (openCount > 0);
1212 
1213             add(XML, start);
1214             return true;
1215         }
1216 
1217         return false;
1218     }
1219 
1220     /**
1221      * Scan over identifier characters.
1222      *
1223      * @return Length of identifier or zero if none found.
1224      */
1225     private int scanIdentifier() {
1226         final int start = position;
1227 
1228         // Make sure first character is valid start character.
1229         if (ch0 == '\\' && ch1 == 'u') {
1230             skip(2);
1231             final int ch = hexSequence(4, TokenType.IDENT);
1232 
1233             if (!Character.isJavaIdentifierStart(ch)) {
1234                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1235             }
1236         } else if (!Character.isJavaIdentifierStart(ch0)) {
1237             // Not an identifier.
1238             return 0;
1239         }
1240 
1241         // Make sure remaining characters are valid part characters.
1242         while (!atEOF()) {
1243             if (ch0 == '\\' && ch1 == 'u') {
1244                 skip(2);
1245                 final int ch = hexSequence(4, TokenType.IDENT);
1246 
1247                 if (!Character.isJavaIdentifierPart(ch)) {
1248                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1249                 }
1250             } else if (Character.isJavaIdentifierPart(ch0)) {
1251                 skip(1);
1252             } else {
1253                 break;
1254             }
1255         }
1256 
1257         // Length of identifier sequence.
1258         return position - start;
1259     }
1260 
1261     /**
1262      * Compare two identifiers (in content) for equality.
1263      *
1264      * @param aStart  Start of first identifier.
1265      * @param aLength Length of first identifier.
1266      * @param bStart  Start of second identifier.
1267      * @param bLength Length of second identifier.
1268      * @return True if equal.
1269      */
1270     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1271         if (aLength == bLength) {
1272             for (int i = 0; i < aLength; i++) {
1273                 if (content[aStart + i] != content[bStart + i]) {
1274                     return false;
1275                 }
1276             }
1277 
1278             return true;
1279         }
1280 
1281         return false;
1282     }
1283 
1284     /**
1285      * Detect if a line starts with a marker identifier.
1286      *
1287      * @param identStart  Start of identifier.
1288      * @param identLength Length of identifier.
1289      * @return True if detected.
1290      */
1291     private boolean hasHereMarker(final int identStart, final int identLength) {
1292         // Skip any whitespace.
1293         skipWhitespace(false);
1294 
1295         return identifierEqual(identStart, identLength, position, scanIdentifier());
1296     }
1297 
1298     /**
1299      * Lexer to service edit strings.
1300      */
1301     private static class EditStringLexer extends Lexer {
1302         /** Type of string literals to emit. */
1303         final TokenType stringType;
1304 
1305         /*
1306          * Constructor.
1307          */
1308 
1309         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1310             super(lexer, stringState);
1311 
1312             this.stringType = stringType;
1313         }
1314 
1315         /**
1316          * Lexify the contents of the string.
1317          */
1318         @Override
1319         public void lexify() {
1320             // Record start of string position.
1321             int stringStart = position;
1322             // Indicate that the priming first string has not been emitted.
1323             boolean primed = false;
1324 
1325             while (true) {
1326                 // Detect end of content.
1327                 if (atEOF()) {
1328                     break;
1329                 }
1330 
1331                 // Honour escapes (should be well formed.)
1332                 if (ch0 == '\\' && stringType == ESCSTRING) {
1333                     skip(2);
1334 
1335                     continue;
1336                 }
1337 
1338                 // If start of expression.
1339                 if (ch0 == '$' && ch1 == '{') {
1340                     if (!primed || stringStart != position) {
1341                         if (primed) {
1342                             add(ADD, stringStart, stringStart + 1);
1343                         }
1344 
1345                         add(stringType, stringStart, position);
1346                         primed = true;
1347                     }
1348 
1349                     // Skip ${
1350                     skip(2);
1351 
1352                     // Save expression state.
1353                     final State expressionState = saveState();
1354 
1355                     // Start with one open brace.
1356                     int braceCount = 1;
1357 
1358                     // Scan for the rest of the string.
1359                     while (!atEOF()) {
1360                         // If closing brace.
1361                         if (ch0 == '}') {
1362                             // Break only only if matching brace.
1363                             if (--braceCount == 0) {
1364                                 break;
1365                             }
1366                         } else if (ch0 == '{') {
1367                             // Bump up the brace count.
1368                             braceCount++;
1369                         }
1370 
1371                         // Skip to next character.
1372                         skip(1);
1373                     }
1374 
1375                     // If braces don't match then report an error.
1376                     if (braceCount != 0) {
1377                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1378                     }
1379 
1380                     // Mark end of expression.
1381                     expressionState.setLimit(position);
1382                     // Skip closing brace.
1383                     skip(1);
1384 
1385                     // Start next string.
1386                     stringStart = position;
1387 
1388                     // Concatenate expression.
1389                     add(ADD, expressionState.position, expressionState.position + 1);
1390                     add(LPAREN, expressionState.position, expressionState.position + 1);
1391 
1392                     // Scan expression.
1393                     final Lexer lexer = new Lexer(this, expressionState);
1394                     lexer.lexify();
1395 
1396                     // Close out expression parenthesis.
1397                     add(RPAREN, position - 1, position);
1398 
1399                     continue;
1400                 }
1401 
1402                 // Next character in string.
1403                 skip(1);
1404             }
1405 
1406             // If there is any unemitted string portion.
1407             if (stringStart != limit) {
1408                 // Concatenate remaining string.
1409                 if (primed) {
1410                     add(ADD, stringStart, 1);
1411                 }
1412 
1413                 add(stringType, stringStart, limit);
1414             }
1415         }
1416 
1417     }
1418 
1419     /**
1420      * Edit string for nested expressions.
1421      *
1422      * @param stringType  Type of string literals to emit.
1423      * @param stringState State of lexer at start of string.
1424      */
1425     private void editString(final TokenType stringType, final State stringState) {
1426         // Use special lexer to scan string.
1427         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1428         lexer.lexify();
1429 
1430         // Need to keep lexer informed.
1431         last = stringType;
1432     }
1433 
1434     /**
1435      * Scan over a here string.
1436      *
1437      * @return TRUE if is a here string.
1438      */
1439     private boolean scanHereString(final LineInfoReceiver lir) {
1440         assert ch0 == '<' && ch1 == '<';
1441         if (scripting) {
1442             // Record beginning of here string.
1443             final State saved = saveState();
1444 
1445             // << or <<<
1446             final boolean excludeLastEOL = ch2 != '<';
1447 
1448             if (excludeLastEOL) {
1449                 skip(2);
1450             } else {
1451                 skip(3);
1452             }
1453 
1454             // Scan identifier.
1455             final int identStart = position;
1456             final int identLength = scanIdentifier();
1457 
1458             // Check for identifier.
1459             if (identLength == 0) {
1460                 // Treat as shift.
1461                 restoreState(saved);
1462 
1463                 return false;
1464             }
1465 
1466             // Record rest of line.
1467             final State restState = saveState();
1468             // keep line number updated
1469             int lastLine = line;
1470 
1471             skipLine(false);
1472             lastLine++;
1473             int lastLinePosition = position;
1474             restState.setLimit(position);
1475 
1476             // Record beginning of string.
1477             final State stringState = saveState();
1478             int stringEnd = position;
1479 
1480             // Hunt down marker.
1481             while (!atEOF()) {
1482                 // Skip any whitespace.
1483                 skipWhitespace(false);
1484 
1485                 if (hasHereMarker(identStart, identLength)) {
1486                     break;
1487                 }
1488 
1489                 skipLine(false);
1490                 lastLine++;
1491                 lastLinePosition = position;
1492                 stringEnd = position;
1493             }
1494 
1495             // notify last line information
1496             lir.lineInfo(lastLine, lastLinePosition);
1497 
1498             // Record end of string.
1499             stringState.setLimit(stringEnd);
1500 
1501             // If marker is missing.
1502             if (stringState.isEmpty() || atEOF()) {
1503                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1504                 restoreState(saved);
1505 
1506                 return false;
1507             }
1508 
1509             // Remove last end of line if specified.
1510             if (excludeLastEOL) {
1511                 // Handles \n.
1512                 if (content[stringEnd - 1] == '\n') {
1513                     stringEnd--;
1514                 }
1515 
1516                 // Handles \r and \r\n.
1517                 if (content[stringEnd - 1] == '\r') {
1518                     stringEnd--;
1519                 }
1520 
1521                 // Update end of string.
1522                 stringState.setLimit(stringEnd);
1523             }
1524 
1525             // Edit string if appropriate.
1526             if (scripting && !stringState.isEmpty()) {
1527                 editString(STRING, stringState);
1528             } else {
1529                 // Add here string.
1530                 add(STRING, stringState.position, stringState.limit);
1531             }
1532 
1533             // Scan rest of original line.
1534             final Lexer restLexer = new Lexer(this, restState);
1535 
1536             restLexer.lexify();
1537 
1538             return true;
1539         }
1540 
1541         return false;
1542     }
1543 
1544     /**
1545      * Breaks source content down into lex units, adding tokens to the token
1546      * stream. The routine scans until the stream buffer is full. Can be called
1547      * repeatedly until EOF is detected.
1548      */
1549     public void lexify() {
1550         while (!stream.isFull() || nested) {
1551             // Skip over whitespace.
1552             skipWhitespace(true);
1553 
1554             // Detect end of file.
1555             if (atEOF()) {
1556                 if (!nested) {
1557                     // Add an EOF token at the end.
1558                     add(EOF, position);
1559                 }
1560 
1561                 break;
1562             }
1563 
1564             // Check for comments. Note that we don't scan for regexp and other literals here as
1565             // we may not have enough context to distinguish them from similar looking operators.
1566             // Instead we break on ambiguous operators below and let the parser decide.
1567             if (ch0 == '/' && skipComments()) {
1568                 continue;
1569             }
1570 
1571             if (scripting && ch0 == '#' && skipComments()) {
1572                 continue;
1573             }
1574 
1575             // TokenType for lookup of delimiter or operator.
1576             TokenType type;
1577 
1578             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1579                 // '.' followed by digit.
1580                 // Scan and add a number.
1581                 scanNumber();
1582             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1583                 // Get the number of characters in the token.
1584                 final int typeLength = type.getLength();
1585                 // Skip that many characters.
1586                 skip(typeLength);
1587                 // Add operator token.
1588                 add(type, position - typeLength);
1589                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1590                 // We break to let the parser decide what it is.
1591                 if (canStartLiteral(type)) {
1592                     break;
1593                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1594                     pauseOnNextLeftBrace = false;
1595                     break;
1596                 }
1597             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1598                 // Scan and add identifier or keyword.
1599                 scanIdentifierOrKeyword();
1600             } else if (isStringDelimiter(ch0)) {
1601                 // Scan and add a string.
1602                 scanString(true);
1603             } else if (Character.isDigit(ch0)) {
1604                 // Scan and add a number.
1605                 scanNumber();
1606             } else {
1607                 // Don't recognize this character.
1608                 skip(1);
1609                 add(ERROR, position - 1);
1610             }
1611         }
1612     }
1613 
1614     /**
1615      * Return value of token given its token descriptor.
1616      *
1617      * @param token  Token descriptor.
1618      * @return JavaScript value.
1619      */
1620     Object getValueOf(final long token, final boolean strict) {
1621         final int start = Token.descPosition(token);
1622         final int len   = Token.descLength(token);
1623 
1624         switch (Token.descType(token)) {
1625         case DECIMAL:
1626             return Lexer.valueOf(source.getString(start, len), 10); // number
1627         case OCTAL:
1628             return Lexer.valueOf(source.getString(start, len), 8); // number
1629         case HEXADECIMAL:
1630             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1631         case FLOATING:
1632             final String str   = source.getString(start, len);
1633             final double value = Double.valueOf(str);
1634             if (str.indexOf('.') != -1) {
1635                 return value; //number
1636             }
1637             //anything without an explicit decimal point is still subject to a
1638             //"representable as int or long" check. Then the programmer does not
1639             //explicitly code something as a double. For example new Color(int, int, int)
1640             //and new Color(float, float, float) will get ambiguous for cases like
1641             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1642             //yet we don't want e.g. 1e6 to be a double unnecessarily
1643             if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) {
1644                 return (int)value;
1645             } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) {
1646                 return (long)value;
1647             }
1648             return value;
1649         case STRING:
1650             return source.getString(start, len); // String
1651         case ESCSTRING:
1652             return valueOfString(start, len, strict); // String
1653         case IDENT:
1654             return valueOfIdent(start, len); // String
1655         case REGEX:
1656             return valueOfPattern(start, len); // RegexToken::LexerToken
1657         case XML:
1658             return valueOfXML(start, len); // XMLToken::LexerToken
1659         case DIRECTIVE_COMMENT:
1660             return source.getString(start, len);
1661         default:
1662             break;
1663         }
1664 
1665         return null;
1666     }
1667 
1668     /**
1669      * Get the correctly localized error message for a given message id format arguments
1670      * @param msgId message id
1671      * @param args  format arguments
1672      * @return message
1673      */
1674     protected static String message(final String msgId, final String... args) {
1675         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1676     }
1677 
1678     /**
1679      * Generate a runtime exception
1680      *
1681      * @param message       error message
1682      * @param type          token type
1683      * @param start         start position of lexed error
1684      * @param length        length of lexed error
1685      * @throws ParserException  unconditionally
1686      */
1687     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1688         final long token     = Token.toDesc(type, start, length);
1689         final int  pos       = Token.descPosition(token);
1690         final int  lineNum   = source.getLine(pos);
1691         final int  columnNum = source.getColumn(pos);
1692         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1693         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1694     }
1695 
1696     /**
1697      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1698      * This is the abstract superclass
1699      */
1700     public static abstract class LexerToken implements Serializable {
1701         private static final long serialVersionUID = 1L;
1702 
1703         private final String expression;
1704 
1705         /**
1706          * Constructor
1707          * @param expression token expression
1708          */
1709         protected LexerToken(final String expression) {
1710             this.expression = expression;
1711         }
1712 
1713         /**
1714          * Get the expression
1715          * @return expression
1716          */
1717         public String getExpression() {
1718             return expression;
1719         }
1720     }
1721 
1722     /**
1723      * Temporary container for regular expressions.
1724      */
1725     public static class RegexToken extends LexerToken {
1726         private static final long serialVersionUID = 1L;
1727 
1728         /** Options. */
1729         private final String options;
1730 
1731         /**
1732          * Constructor.
1733          *
1734          * @param expression  regexp expression
1735          * @param options     regexp options
1736          */
1737         public RegexToken(final String expression, final String options) {
1738             super(expression);
1739             this.options = options;
1740         }
1741 
1742         /**
1743          * Get regexp options
1744          * @return options
1745          */
1746         public String getOptions() {
1747             return options;
1748         }
1749 
1750         @Override
1751         public String toString() {
1752             return '/' + getExpression() + '/' + options;
1753         }
1754     }
1755 
1756     /**
1757      * Temporary container for XML expression.
1758      */
1759     public static class XMLToken extends LexerToken {
1760         private static final long serialVersionUID = 1L;
1761 
1762         /**
1763          * Constructor.
1764          *
1765          * @param expression  XML expression
1766          */
1767         public XMLToken(final String expression) {
1768             super(expression);
1769         }
1770     }
1771 }