1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  32 import static jdk.nashorn.internal.parser.TokenType.EOF;
  33 import static jdk.nashorn.internal.parser.TokenType.EOL;
  34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  44 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  46 import static jdk.nashorn.internal.parser.TokenType.STRING;
  47 import static jdk.nashorn.internal.parser.TokenType.XML;
  48 
  49 import java.io.Serializable;
  50 import jdk.nashorn.internal.runtime.ECMAErrors;
  51 import jdk.nashorn.internal.runtime.ErrorManager;
  52 import jdk.nashorn.internal.runtime.JSErrorType;
  53 import jdk.nashorn.internal.runtime.JSType;
  54 import jdk.nashorn.internal.runtime.ParserException;
  55 import jdk.nashorn.internal.runtime.Source;
  56 import jdk.nashorn.internal.runtime.options.Options;
  57 
  58 /**
  59  * Responsible for converting source content into a stream of tokens.
  60  *
  61  */
  62 @SuppressWarnings("fallthrough")
  63 public class Lexer extends Scanner {
  64     private static final long MIN_INT_L = Integer.MIN_VALUE;
  65     private static final long MAX_INT_L = Integer.MAX_VALUE;
  66 
  67     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  68 
  69     /** Content source. */
  70     private final Source source;
  71 
  72     /** Buffered stream for tokens. */
  73     private final TokenStream stream;
  74 
  75     /** True if here and edit strings are supported. */
  76     private final boolean scripting;
  77 
  78     /** True if a nested scan. (scan to completion, no EOF.) */
  79     private final boolean nested;
  80 
  81     /** Pending new line number and position. */
  82     int pendingLine;
  83 
  84     /** Position of last EOL + 1. */
  85     private int linePosition;
  86 
  87     /** Type of last token added. */
  88     private TokenType last;
  89 
  90     private final boolean pauseOnFunctionBody;
  91     private boolean pauseOnNextLeftBrace;
  92 
  93     private static final String SPACETAB = " \t";  // ASCII space and tab
  94     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
  95 
  96     private static final String JSON_WHITESPACE_EOL = LFCR;
  97     private static final String JSON_WHITESPACE     = SPACETAB + LFCR;
  98 
  99     private static final String JAVASCRIPT_WHITESPACE_EOL =
 100         LFCR +
 101         "\u2028" + // line separator
 102         "\u2029"   // paragraph separator
 103         ;
 104     private static final String JAVASCRIPT_WHITESPACE =
 105         SPACETAB +
 106         JAVASCRIPT_WHITESPACE_EOL +
 107         "\u000b" + // tabulation line
 108         "\u000c" + // ff (ctrl-l)
 109         "\u00a0" + // Latin-1 space
 110         "\u1680" + // Ogham space mark
 111         "\u180e" + // separator, Mongolian vowel
 112         "\u2000" + // en quad
 113         "\u2001" + // em quad
 114         "\u2002" + // en space
 115         "\u2003" + // em space
 116         "\u2004" + // three-per-em space
 117         "\u2005" + // four-per-em space
 118         "\u2006" + // six-per-em space
 119         "\u2007" + // figure space
 120         "\u2008" + // punctuation space
 121         "\u2009" + // thin space
 122         "\u200a" + // hair space
 123         "\u202f" + // narrow no-break space
 124         "\u205f" + // medium mathematical space
 125         "\u3000" + // ideographic space
 126         "\ufeff"   // byte order mark
 127         ;
 128 
 129     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 130         "\\u000a" + // line feed
 131         "\\u000d" + // carriage return (ctrl-m)
 132         "\\u2028" + // line separator
 133         "\\u2029" + // paragraph separator
 134         "\\u0009" + // tab
 135         "\\u0020" + // ASCII space
 136         "\\u000b" + // tabulation line
 137         "\\u000c" + // ff (ctrl-l)
 138         "\\u00a0" + // Latin-1 space
 139         "\\u1680" + // Ogham space mark
 140         "\\u180e" + // separator, Mongolian vowel
 141         "\\u2000" + // en quad
 142         "\\u2001" + // em quad
 143         "\\u2002" + // en space
 144         "\\u2003" + // em space
 145         "\\u2004" + // three-per-em space
 146         "\\u2005" + // four-per-em space
 147         "\\u2006" + // six-per-em space
 148         "\\u2007" + // figure space
 149         "\\u2008" + // punctuation space
 150         "\\u2009" + // thin space
 151         "\\u200a" + // hair space
 152         "\\u202f" + // narrow no-break space
 153         "\\u205f" + // medium mathematical space
 154         "\\u3000" + // ideographic space
 155         "\\ufeff"   // byte order mark
 156         ;
 157 
 158     static String unicodeEscape(final char ch) {
 159         final StringBuilder sb = new StringBuilder();
 160 
 161         sb.append("\\u");
 162 
 163         final String hex = Integer.toHexString(ch);
 164         for (int i = hex.length(); i < 4; i++) {
 165             sb.append('0');
 166         }
 167         sb.append(hex);
 168 
 169         return sb.toString();
 170     }
 171 
 172     /**
 173      * Constructor
 174      *
 175      * @param source    the source
 176      * @param stream    the token stream to lex
 177      */
 178     public Lexer(final Source source, final TokenStream stream) {
 179         this(source, stream, false);
 180     }
 181 
 182     /**
 183      * Constructor
 184      *
 185      * @param source    the source
 186      * @param stream    the token stream to lex
 187      * @param scripting are we in scripting mode
 188      */
 189     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 190         this(source, 0, source.getLength(), stream, scripting, false);
 191     }
 192 
 193     /**
 194      * Constructor
 195      *
 196      * @param source    the source
 197      * @param start     start position in source from which to start lexing
 198      * @param len       length of source segment to lex
 199      * @param stream    token stream to lex
 200      * @param scripting are we in scripting mode
 201      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 202      * function body. This is used with the feature where the parser is skipping nested function bodies to
 203      * avoid reading ahead unnecessarily when we skip the function bodies.
 204      */
 205 
 206     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
 207         super(source.getContent(), 1, start, len);
 208         this.source      = source;
 209         this.stream      = stream;
 210         this.scripting   = scripting;
 211         this.nested      = false;
 212         this.pendingLine = 1;
 213         this.last        = EOL;
 214 
 215         this.pauseOnFunctionBody = pauseOnFunctionBody;
 216     }
 217 
 218     private Lexer(final Lexer lexer, final State state) {
 219         super(lexer, state);
 220 
 221         source = lexer.source;
 222         stream = lexer.stream;
 223         scripting = lexer.scripting;
 224         nested = true;
 225 
 226         pendingLine = state.pendingLine;
 227         linePosition = state.linePosition;
 228         last = EOL;
 229         pauseOnFunctionBody = false;
 230     }
 231 
 232     static class State extends Scanner.State {
 233         /** Pending new line number and position. */
 234         public final int pendingLine;
 235 
 236         /** Position of last EOL + 1. */
 237         public final int linePosition;
 238 
 239         /** Type of last token added. */
 240         public final TokenType last;
 241 
 242         /*
 243          * Constructor.
 244          */
 245 
 246         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 247             super(position, limit, line);
 248 
 249             this.pendingLine = pendingLine;
 250             this.linePosition = linePosition;
 251             this.last = last;
 252         }
 253     }
 254 
 255     /**
 256      * Save the state of the scan.
 257      *
 258      * @return Captured state.
 259      */
 260     @Override
 261     State saveState() {
 262         return new State(position, limit, line, pendingLine, linePosition, last);
 263     }
 264 
 265     /**
 266      * Restore the state of the scan.
 267      *
 268      * @param state
 269      *            Captured state.
 270      */
 271     void restoreState(final State state) {
 272         super.restoreState(state);
 273 
 274         pendingLine = state.pendingLine;
 275         linePosition = state.linePosition;
 276         last = state.last;
 277     }
 278 
 279     /**
 280      * Add a new token to the stream.
 281      *
 282      * @param type
 283      *            Token type.
 284      * @param start
 285      *            Start position.
 286      * @param end
 287      *            End position.
 288      */
 289     protected void add(final TokenType type, final int start, final int end) {
 290         // Record last token.
 291         last = type;
 292 
 293         // Only emit the last EOL in a cluster.
 294         if (type == EOL) {
 295             pendingLine = end;
 296             linePosition = start;
 297         } else {
 298             // Write any pending EOL to stream.
 299             if (pendingLine != -1) {
 300                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 301                 pendingLine = -1;
 302             }
 303 
 304             // Write token to stream.
 305             stream.put(Token.toDesc(type, start, end - start));
 306         }
 307     }
 308 
 309     /**
 310      * Add a new token to the stream.
 311      *
 312      * @param type
 313      *            Token type.
 314      * @param start
 315      *            Start position.
 316      */
 317     protected void add(final TokenType type, final int start) {
 318         add(type, start, position);
 319     }
 320 
 321     /**
 322      * Return the String of valid whitespace characters for regular
 323      * expressions in JavaScript
 324      * @return regexp whitespace string
 325      */
 326     public static String getWhitespaceRegExp() {
 327         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 328     }
 329 
 330     /**
 331      * Skip end of line.
 332      *
 333      * @param addEOL true if EOL token should be recorded.
 334      */
 335     private void skipEOL(final boolean addEOL) {
 336 
 337         if (ch0 == '\r') { // detect \r\n pattern
 338             skip(1);
 339             if (ch0 == '\n') {
 340                 skip(1);
 341             }
 342         } else { // all other space, ch0 is guaranteed to be EOL or \0
 343             skip(1);
 344         }
 345 
 346         // bump up line count
 347         line++;
 348 
 349         if (addEOL) {
 350             // Add an EOL token.
 351             add(EOL, position, line);
 352         }
 353     }
 354 
 355     /**
 356      * Skip over rest of line including end of line.
 357      *
 358      * @param addEOL true if EOL token should be recorded.
 359      */
 360     private void skipLine(final boolean addEOL) {
 361         // Ignore characters.
 362         while (!isEOL(ch0) && !atEOF()) {
 363             skip(1);
 364         }
 365         // Skip over end of line.
 366         skipEOL(addEOL);
 367     }
 368 
 369     /**
 370      * Test whether a char is valid JavaScript whitespace
 371      * @param ch a char
 372      * @return true if valid JavaScript whitespace
 373      */
 374     public static boolean isJSWhitespace(final char ch) {
 375         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 376     }
 377 
 378     /**
 379      * Test whether a char is valid JavaScript end of line
 380      * @param ch a char
 381      * @return true if valid JavaScript end of line
 382      */
 383     public static boolean isJSEOL(final char ch) {
 384         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 385     }
 386 
 387     /**
 388      * Test whether a char is valid JSON whitespace
 389      * @param ch a char
 390      * @return true if valid JSON whitespace
 391      */
 392     public static boolean isJsonWhitespace(final char ch) {
 393         return JSON_WHITESPACE.indexOf(ch) != -1;
 394     }
 395 
 396     /**
 397      * Test whether a char is valid JSON end of line
 398      * @param ch a char
 399      * @return true if valid JSON end of line
 400      */
 401     public static boolean isJsonEOL(final char ch) {
 402         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
 403     }
 404 
 405     /**
 406      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 407      * strings ('`') in scripting mode.
 408      * @param ch a char
 409      * @return true if string delimiter
 410      */
 411     protected boolean isStringDelimiter(final char ch) {
 412         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 413     }
 414 
 415     /**
 416      * Test whether a char is valid JavaScript whitespace
 417      * @param ch a char
 418      * @return true if valid JavaScript whitespace
 419      */
 420     protected boolean isWhitespace(final char ch) {
 421         return Lexer.isJSWhitespace(ch);
 422     }
 423 
 424     /**
 425      * Test whether a char is valid JavaScript end of line
 426      * @param ch a char
 427      * @return true if valid JavaScript end of line
 428      */
 429     protected boolean isEOL(final char ch) {
 430         return Lexer.isJSEOL(ch);
 431     }
 432 
 433     /**
 434      * Skip over whitespace and detect end of line, adding EOL tokens if
 435      * encountered.
 436      *
 437      * @param addEOL true if EOL tokens should be recorded.
 438      */
 439     private void skipWhitespace(final boolean addEOL) {
 440         while (isWhitespace(ch0)) {
 441             if (isEOL(ch0)) {
 442                 skipEOL(addEOL);
 443             } else {
 444                 skip(1);
 445             }
 446         }
 447     }
 448 
 449     /**
 450      * Skip over comments.
 451      *
 452      * @return True if a comment.
 453      */
 454     protected boolean skipComments() {
 455         // Save the current position.
 456         final int start = position;
 457 
 458         if (ch0 == '/') {
 459             // Is it a // comment.
 460             if (ch1 == '/') {
 461                 // Skip over //.
 462                 skip(2);
 463 
 464                 boolean directiveComment = false;
 465                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 466                     directiveComment = true;
 467                 }
 468 
 469                 // Scan for EOL.
 470                 while (!atEOF() && !isEOL(ch0)) {
 471                     skip(1);
 472                 }
 473                 // Did detect a comment.
 474                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 475                 return true;
 476             } else if (ch1 == '*') {
 477                 // Skip over /*.
 478                 skip(2);
 479                 // Scan for */.
 480                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 481                     // If end of line handle else skip character.
 482                     if (isEOL(ch0)) {
 483                         skipEOL(true);
 484                     } else {
 485                         skip(1);
 486                     }
 487                 }
 488 
 489                 if (atEOF()) {
 490                     // TODO - Report closing */ missing in parser.
 491                     add(ERROR, start);
 492                 } else {
 493                     // Skip */.
 494                     skip(2);
 495                 }
 496 
 497                 // Did detect a comment.
 498                 add(COMMENT, start);
 499                 return true;
 500             }
 501         } else if (ch0 == '#') {
 502             assert scripting;
 503             // shell style comment
 504             // Skip over #.
 505             skip(1);
 506             // Scan for EOL.
 507             while (!atEOF() && !isEOL(ch0)) {
 508                 skip(1);
 509             }
 510             // Did detect a comment.
 511             add(COMMENT, start);
 512             return true;
 513         }
 514 
 515         // Not a comment.
 516         return false;
 517     }
 518 
 519     /**
 520      * Convert a regex token to a token object.
 521      *
 522      * @param start  Position in source content.
 523      * @param length Length of regex token.
 524      * @return Regex token object.
 525      */
 526     public RegexToken valueOfPattern(final int start, final int length) {
 527         // Save the current position.
 528         final int savePosition = position;
 529         // Reset to beginning of content.
 530         reset(start);
 531         // Buffer for recording characters.
 532         final StringBuilder sb = new StringBuilder(length);
 533 
 534         // Skip /.
 535         skip(1);
 536         boolean inBrackets = false;
 537         // Scan for closing /, stopping at end of line.
 538         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 539             // Skip over escaped character.
 540             if (ch0 == '\\') {
 541                 sb.append(ch0);
 542                 sb.append(ch1);
 543                 skip(2);
 544             } else {
 545                 if (ch0 == '[') {
 546                     inBrackets = true;
 547                 } else if (ch0 == ']') {
 548                     inBrackets = false;
 549                 }
 550 
 551                 // Skip literal character.
 552                 sb.append(ch0);
 553                 skip(1);
 554             }
 555         }
 556 
 557         // Get pattern as string.
 558         final String regex = sb.toString();
 559 
 560         // Skip /.
 561         skip(1);
 562 
 563         // Options as string.
 564         final String options = source.getString(position, scanIdentifier());
 565 
 566         reset(savePosition);
 567 
 568         // Compile the pattern.
 569         return new RegexToken(regex, options);
 570     }
 571 
 572     /**
 573      * Return true if the given token can be the beginning of a literal.
 574      *
 575      * @param token a token
 576      * @return true if token can start a literal.
 577      */
 578     public boolean canStartLiteral(final TokenType token) {
 579         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 580     }
 581 
 582     /**
 583      * interface to receive line information for multi-line literals.
 584      */
 585     protected interface LineInfoReceiver {
 586         /**
 587          * Receives line information
 588          * @param line last line number
 589          * @param linePosition position of last line
 590          */
 591         public void lineInfo(int line, int linePosition);
 592     }
 593 
 594     /**
 595      * Check whether the given token represents the beginning of a literal. If so scan
 596      * the literal and return <tt>true</tt>, otherwise return false.
 597      *
 598      * @param token the token.
 599      * @param startTokenType the token type.
 600      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 601      * @return True if a literal beginning with startToken was found and scanned.
 602      */
 603     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 604         // Check if it can be a literal.
 605         if (!canStartLiteral(startTokenType)) {
 606             return false;
 607         }
 608         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 609         if (stream.get(stream.last()) != token) {
 610             return false;
 611         }
 612         // Rewind to token start position
 613         reset(Token.descPosition(token));
 614 
 615         if (ch0 == '/') {
 616             return scanRegEx();
 617         } else if (ch0 == '<') {
 618             if (ch1 == '<') {
 619                 return scanHereString(lir);
 620             } else if (Character.isJavaIdentifierStart(ch1)) {
 621                 return scanXMLLiteral();
 622             }
 623         }
 624 
 625         return false;
 626     }
 627 
 628     /**
 629      * Scan over regex literal.
 630      *
 631      * @return True if a regex literal.
 632      */
 633     private boolean scanRegEx() {
 634         assert ch0 == '/';
 635         // Make sure it's not a comment.
 636         if (ch1 != '/' && ch1 != '*') {
 637             // Record beginning of literal.
 638             final int start = position;
 639             // Skip /.
 640             skip(1);
 641             boolean inBrackets = false;
 642 
 643             // Scan for closing /, stopping at end of line.
 644             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 645                 // Skip over escaped character.
 646                 if (ch0 == '\\') {
 647                     skip(1);
 648                     if (isEOL(ch0)) {
 649                         reset(start);
 650                         return false;
 651                     }
 652                     skip(1);
 653                 } else {
 654                     if (ch0 == '[') {
 655                         inBrackets = true;
 656                     } else if (ch0 == ']') {
 657                         inBrackets = false;
 658                     }
 659 
 660                     // Skip literal character.
 661                     skip(1);
 662                 }
 663             }
 664 
 665             // If regex literal.
 666             if (ch0 == '/') {
 667                 // Skip /.
 668                 skip(1);
 669 
 670                 // Skip over options.
 671                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 672                     skip(1);
 673                 }
 674 
 675                 // Add regex token.
 676                 add(REGEX, start);
 677                 // Regex literal detected.
 678                 return true;
 679             }
 680 
 681             // False start try again.
 682             reset(start);
 683         }
 684 
 685         // Regex literal not detected.
 686         return false;
 687     }
 688 
 689     /**
 690      * Convert a digit to a integer.  Can't use Character.digit since we are
 691      * restricted to ASCII by the spec.
 692      *
 693      * @param ch   Character to convert.
 694      * @param base Numeric base.
 695      *
 696      * @return The converted digit or -1 if invalid.
 697      */
 698     protected static int convertDigit(final char ch, final int base) {
 699         int digit;
 700 
 701         if ('0' <= ch && ch <= '9') {
 702             digit = ch - '0';
 703         } else if ('A' <= ch && ch <= 'Z') {
 704             digit = ch - 'A' + 10;
 705         } else if ('a' <= ch && ch <= 'z') {
 706             digit = ch - 'a' + 10;
 707         } else {
 708             return -1;
 709         }
 710 
 711         return digit < base ? digit : -1;
 712     }
 713 
 714 
 715     /**
 716      * Get the value of a hexadecimal numeric sequence.
 717      *
 718      * @param length Number of digits.
 719      * @param type   Type of token to report against.
 720      * @return Value of sequence or < 0 if no digits.
 721      */
 722     private int hexSequence(final int length, final TokenType type) {
 723         int value = 0;
 724 
 725         for (int i = 0; i < length; i++) {
 726             final int digit = convertDigit(ch0, 16);
 727 
 728             if (digit == -1) {
 729                 error(Lexer.message("invalid.hex"), type, position, limit);
 730                 return i == 0 ? -1 : value;
 731             }
 732 
 733             value = digit | value << 4;
 734             skip(1);
 735         }
 736 
 737         return value;
 738     }
 739 
 740     /**
 741      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 742      *
 743      * @return Value of sequence.
 744      */
 745     private int octalSequence() {
 746         int value = 0;
 747 
 748         for (int i = 0; i < 3; i++) {
 749             final int digit = convertDigit(ch0, 8);
 750 
 751             if (digit == -1) {
 752                 break;
 753             }
 754             value = digit | value << 3;
 755             skip(1);
 756 
 757             if (i == 1 && value >= 32) {
 758                 break;
 759             }
 760         }
 761         return value;
 762     }
 763 
 764     /**
 765      * Convert a string to a JavaScript identifier.
 766      *
 767      * @param start  Position in source content.
 768      * @param length Length of token.
 769      * @return Ident string or null if an error.
 770      */
 771     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 772         // Save the current position.
 773         final int savePosition = position;
 774         // End of scan.
 775         final int end = start + length;
 776         // Reset to beginning of content.
 777         reset(start);
 778         // Buffer for recording characters.
 779         final StringBuilder sb = new StringBuilder(length);
 780 
 781         // Scan until end of line or end of file.
 782         while (!atEOF() && position < end && !isEOL(ch0)) {
 783             // If escape character.
 784             if (ch0 == '\\' && ch1 == 'u') {
 785                 skip(2);
 786                 final int ch = hexSequence(4, TokenType.IDENT);
 787                 if (isWhitespace((char)ch)) {
 788                     return null;
 789                 }
 790                 if (ch < 0) {
 791                     sb.append('\\');
 792                     sb.append('u');
 793                 } else {
 794                     sb.append((char)ch);
 795                 }
 796             } else {
 797                 // Add regular character.
 798                 sb.append(ch0);
 799                 skip(1);
 800             }
 801         }
 802 
 803         // Restore position.
 804         reset(savePosition);
 805 
 806         return sb.toString();
 807     }
 808 
 809     /**
 810      * Scan over and identifier or keyword. Handles identifiers containing
 811      * encoded Unicode chars.
 812      *
 813      * Example:
 814      *
 815      * var \u0042 = 44;
 816      */
 817     private void scanIdentifierOrKeyword() {
 818         // Record beginning of identifier.
 819         final int start = position;
 820         // Scan identifier.
 821         final int length = scanIdentifier();
 822         // Check to see if it is a keyword.
 823         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 824         if (type == FUNCTION && pauseOnFunctionBody) {
 825             pauseOnNextLeftBrace = true;
 826         }
 827         // Add keyword or identifier token.
 828         add(type, start);
 829     }
 830 
 831     /**
 832      * Convert a string to a JavaScript string object.
 833      *
 834      * @param start  Position in source content.
 835      * @param length Length of token.
 836      * @return JavaScript string object.
 837      */
 838     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 839         // Save the current position.
 840         final int savePosition = position;
 841         // Calculate the end position.
 842         final int end = start + length;
 843         // Reset to beginning of string.
 844         reset(start);
 845 
 846         // Buffer for recording characters.
 847         final StringBuilder sb = new StringBuilder(length);
 848 
 849         // Scan until end of string.
 850         while (position < end) {
 851             // If escape character.
 852             if (ch0 == '\\') {
 853                 skip(1);
 854 
 855                 final char next = ch0;
 856                 final int afterSlash = position;
 857 
 858                 skip(1);
 859 
 860                 // Special characters.
 861                 switch (next) {
 862                 case '0':
 863                 case '1':
 864                 case '2':
 865                 case '3':
 866                 case '4':
 867                 case '5':
 868                 case '6':
 869                 case '7': {
 870                     if (strict) {
 871                         // "\0" itself is allowed in strict mode. Only other 'real'
 872                         // octal escape sequences are not allowed (eg. "\02", "\31").
 873                         // See section 7.8.4 String literals production EscapeSequence
 874                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 875                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 876                         }
 877                     }
 878                     reset(afterSlash);
 879                     // Octal sequence.
 880                     final int ch = octalSequence();
 881 
 882                     if (ch < 0) {
 883                         sb.append('\\');
 884                         sb.append('x');
 885                     } else {
 886                         sb.append((char)ch);
 887                     }
 888                     break;
 889                 }
 890                 case 'n':
 891                     sb.append('\n');
 892                     break;
 893                 case 't':
 894                     sb.append('\t');
 895                     break;
 896                 case 'b':
 897                     sb.append('\b');
 898                     break;
 899                 case 'f':
 900                     sb.append('\f');
 901                     break;
 902                 case 'r':
 903                     sb.append('\r');
 904                     break;
 905                 case '\'':
 906                     sb.append('\'');
 907                     break;
 908                 case '\"':
 909                     sb.append('\"');
 910                     break;
 911                 case '\\':
 912                     sb.append('\\');
 913                     break;
 914                 case '\r': // CR | CRLF
 915                     if (ch0 == '\n') {
 916                         skip(1);
 917                     }
 918                     // fall through
 919                 case '\n': // LF
 920                 case '\u2028': // LS
 921                 case '\u2029': // PS
 922                     // continue on the next line, slash-return continues string
 923                     // literal
 924                     break;
 925                 case 'x': {
 926                     // Hex sequence.
 927                     final int ch = hexSequence(2, STRING);
 928 
 929                     if (ch < 0) {
 930                         sb.append('\\');
 931                         sb.append('x');
 932                     } else {
 933                         sb.append((char)ch);
 934                     }
 935                 }
 936                     break;
 937                 case 'u': {
 938                     // Unicode sequence.
 939                     final int ch = hexSequence(4, STRING);
 940 
 941                     if (ch < 0) {
 942                         sb.append('\\');
 943                         sb.append('u');
 944                     } else {
 945                         sb.append((char)ch);
 946                     }
 947                 }
 948                     break;
 949                 case 'v':
 950                     sb.append('\u000B');
 951                     break;
 952                 // All other characters.
 953                 default:
 954                     sb.append(next);
 955                     break;
 956                 }
 957             } else {
 958                 // Add regular character.
 959                 sb.append(ch0);
 960                 skip(1);
 961             }
 962         }
 963 
 964         // Restore position.
 965         reset(savePosition);
 966 
 967         return sb.toString();
 968     }
 969 
 970     /**
 971      * Scan over a string literal.
 972      * @param add true if we nare not just scanning but should actually modify the token stream
 973      */
 974     protected void scanString(final boolean add) {
 975         // Type of string.
 976         TokenType type = STRING;
 977         // Record starting quote.
 978         final char quote = ch0;
 979         // Skip over quote.
 980         skip(1);
 981 
 982         // Record beginning of string content.
 983         final State stringState = saveState();
 984 
 985         // Scan until close quote or end of line.
 986         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 987             // Skip over escaped character.
 988             if (ch0 == '\\') {
 989                 type = ESCSTRING;
 990                 skip(1);
 991                 if (! isEscapeCharacter(ch0)) {
 992                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 993                 }
 994                 if (isEOL(ch0)) {
 995                     // Multiline string literal
 996                     skipEOL(false);
 997                     continue;
 998                 }
 999             }
1000             // Skip literal character.
1001             skip(1);
1002         }
1003 
1004         // If close quote.
1005         if (ch0 == quote) {
1006             // Skip close quote.
1007             skip(1);
1008         } else {
1009             error(Lexer.message("missing.close.quote"), STRING, position, limit);
1010         }
1011 
1012         // If not just scanning.
1013         if (add) {
1014             // Record end of string.
1015             stringState.setLimit(position - 1);
1016 
1017             if (scripting && !stringState.isEmpty()) {
1018                 switch (quote) {
1019                 case '`':
1020                     // Mark the beginning of an exec string.
1021                     add(EXECSTRING, stringState.position, stringState.limit);
1022                     // Frame edit string with left brace.
1023                     add(LBRACE, stringState.position, stringState.position);
1024                     // Process edit string.
1025                     editString(type, stringState);
1026                     // Frame edit string with right brace.
1027                     add(RBRACE, stringState.limit, stringState.limit);
1028                     break;
1029                 case '"':
1030                     // Only edit double quoted strings.
1031                     editString(type, stringState);
1032                     break;
1033                 case '\'':
1034                     // Add string token without editing.
1035                     add(type, stringState.position, stringState.limit);
1036                     break;
1037                 default:
1038                     break;
1039                 }
1040             } else {
1041                 /// Add string token without editing.
1042                 add(type, stringState.position, stringState.limit);
1043             }
1044         }
1045     }
1046 
1047     /**
1048      * Is the given character a valid escape char after "\" ?
1049      *
1050      * @param ch character to be checked
1051      * @return if the given character is valid after "\"
1052      */
1053     protected boolean isEscapeCharacter(final char ch) {
1054         return true;
1055     }
1056 
1057     /**
1058      * Convert string to number.
1059      *
1060      * @param valueString  String to convert.
1061      * @param radix        Numeric base.
1062      * @return Converted number.
1063      */
1064     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1065         try {
1066             final long value = Long.parseLong(valueString, radix);
1067             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1068                 return Integer.valueOf((int)value);
1069             }
1070             return Long.valueOf(value);
1071         } catch (final NumberFormatException e) {
1072             if (radix == 10) {
1073                 return Double.valueOf(valueString);
1074             }
1075 
1076             double value = 0.0;
1077 
1078             for (int i = 0; i < valueString.length(); i++) {
1079                 final char ch = valueString.charAt(i);
1080                 // Preverified, should always be a valid digit.
1081                 final int digit = convertDigit(ch, radix);
1082                 value *= radix;
1083                 value += digit;
1084             }
1085 
1086             return value;
1087         }
1088     }
1089 
1090     /**
1091      * Scan a number.
1092      */
1093     protected void scanNumber() {
1094         // Record beginning of number.
1095         final int start = position;
1096         // Assume value is a decimal.
1097         TokenType type = DECIMAL;
1098 
1099         // First digit of number.
1100         int digit = convertDigit(ch0, 10);
1101 
1102         // If number begins with 0x.
1103         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1104             // Skip over 0xN.
1105             skip(3);
1106             // Skip over remaining digits.
1107             while (convertDigit(ch0, 16) != -1) {
1108                 skip(1);
1109             }
1110 
1111             type = HEXADECIMAL;
1112         } else {
1113             // Check for possible octal constant.
1114             boolean octal = digit == 0;
1115             // Skip first digit if not leading '.'.
1116             if (digit != -1) {
1117                 skip(1);
1118             }
1119 
1120             // Skip remaining digits.
1121             while ((digit = convertDigit(ch0, 10)) != -1) {
1122                 // Check octal only digits.
1123                 octal = octal && digit < 8;
1124                 // Skip digit.
1125                 skip(1);
1126             }
1127 
1128             if (octal && position - start > 1) {
1129                 type = OCTAL;
1130             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1131                 // Must be a double.
1132                 if (ch0 == '.') {
1133                     // Skip period.
1134                     skip(1);
1135                     // Skip mantissa.
1136                     while (convertDigit(ch0, 10) != -1) {
1137                         skip(1);
1138                     }
1139                 }
1140 
1141                 // Detect exponent.
1142                 if (ch0 == 'E' || ch0 == 'e') {
1143                     // Skip E.
1144                     skip(1);
1145                     // Detect and skip exponent sign.
1146                     if (ch0 == '+' || ch0 == '-') {
1147                         skip(1);
1148                     }
1149                     // Skip exponent.
1150                     while (convertDigit(ch0, 10) != -1) {
1151                         skip(1);
1152                     }
1153                 }
1154 
1155                 type = FLOATING;
1156             }
1157         }
1158 
1159         if (Character.isJavaIdentifierStart(ch0)) {
1160             error(Lexer.message("missing.space.after.number"), type, position, 1);
1161         }
1162 
1163         // Add number token.
1164         add(type, start);
1165     }
1166 
1167     /**
1168      * Convert a regex token to a token object.
1169      *
1170      * @param start  Position in source content.
1171      * @param length Length of regex token.
1172      * @return Regex token object.
1173      */
1174     XMLToken valueOfXML(final int start, final int length) {
1175         return new XMLToken(source.getString(start, length));
1176     }
1177 
1178     /**
1179      * Scan over a XML token.
1180      *
1181      * @return TRUE if is an XML literal.
1182      */
1183     private boolean scanXMLLiteral() {
1184         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1185         if (XML_LITERALS) {
1186             // Record beginning of xml expression.
1187             final int start = position;
1188 
1189             int openCount = 0;
1190 
1191             do {
1192                 if (ch0 == '<') {
1193                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1194                         skip(3);
1195                         openCount--;
1196                     } else if (Character.isJavaIdentifierStart(ch1)) {
1197                         skip(2);
1198                         openCount++;
1199                     } else if (ch1 == '?') {
1200                         skip(2);
1201                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1202                         skip(4);
1203                     } else {
1204                         reset(start);
1205                         return false;
1206                     }
1207 
1208                     while (!atEOF() && ch0 != '>') {
1209                         if (ch0 == '/' && ch1 == '>') {
1210                             openCount--;
1211                             skip(1);
1212                             break;
1213                         } else if (ch0 == '\"' || ch0 == '\'') {
1214                             scanString(false);
1215                         } else {
1216                             skip(1);
1217                         }
1218                     }
1219 
1220                     if (ch0 != '>') {
1221                         reset(start);
1222                         return false;
1223                     }
1224 
1225                     skip(1);
1226                 } else if (atEOF()) {
1227                     reset(start);
1228                     return false;
1229                 } else {
1230                     skip(1);
1231                 }
1232             } while (openCount > 0);
1233 
1234             add(XML, start);
1235             return true;
1236         }
1237 
1238         return false;
1239     }
1240 
1241     /**
1242      * Scan over identifier characters.
1243      *
1244      * @return Length of identifier or zero if none found.
1245      */
1246     private int scanIdentifier() {
1247         final int start = position;
1248 
1249         // Make sure first character is valid start character.
1250         if (ch0 == '\\' && ch1 == 'u') {
1251             skip(2);
1252             final int ch = hexSequence(4, TokenType.IDENT);
1253 
1254             if (!Character.isJavaIdentifierStart(ch)) {
1255                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1256             }
1257         } else if (!Character.isJavaIdentifierStart(ch0)) {
1258             // Not an identifier.
1259             return 0;
1260         }
1261 
1262         // Make sure remaining characters are valid part characters.
1263         while (!atEOF()) {
1264             if (ch0 == '\\' && ch1 == 'u') {
1265                 skip(2);
1266                 final int ch = hexSequence(4, TokenType.IDENT);
1267 
1268                 if (!Character.isJavaIdentifierPart(ch)) {
1269                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1270                 }
1271             } else if (Character.isJavaIdentifierPart(ch0)) {
1272                 skip(1);
1273             } else {
1274                 break;
1275             }
1276         }
1277 
1278         // Length of identifier sequence.
1279         return position - start;
1280     }
1281 
1282     /**
1283      * Compare two identifiers (in content) for equality.
1284      *
1285      * @param aStart  Start of first identifier.
1286      * @param aLength Length of first identifier.
1287      * @param bStart  Start of second identifier.
1288      * @param bLength Length of second identifier.
1289      * @return True if equal.
1290      */
1291     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1292         if (aLength == bLength) {
1293             for (int i = 0; i < aLength; i++) {
1294                 if (content[aStart + i] != content[bStart + i]) {
1295                     return false;
1296                 }
1297             }
1298 
1299             return true;
1300         }
1301 
1302         return false;
1303     }
1304 
1305     /**
1306      * Detect if a line starts with a marker identifier.
1307      *
1308      * @param identStart  Start of identifier.
1309      * @param identLength Length of identifier.
1310      * @return True if detected.
1311      */
1312     private boolean hasHereMarker(final int identStart, final int identLength) {
1313         // Skip any whitespace.
1314         skipWhitespace(false);
1315 
1316         return identifierEqual(identStart, identLength, position, scanIdentifier());
1317     }
1318 
1319     /**
1320      * Lexer to service edit strings.
1321      */
1322     private static class EditStringLexer extends Lexer {
1323         /** Type of string literals to emit. */
1324         final TokenType stringType;
1325 
1326         /*
1327          * Constructor.
1328          */
1329 
1330         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1331             super(lexer, stringState);
1332 
1333             this.stringType = stringType;
1334         }
1335 
1336         /**
1337          * Lexify the contents of the string.
1338          */
1339         @Override
1340         public void lexify() {
1341             // Record start of string position.
1342             int stringStart = position;
1343             // Indicate that the priming first string has not been emitted.
1344             boolean primed = false;
1345 
1346             while (true) {
1347                 // Detect end of content.
1348                 if (atEOF()) {
1349                     break;
1350                 }
1351 
1352                 // Honour escapes (should be well formed.)
1353                 if (ch0 == '\\' && stringType == ESCSTRING) {
1354                     skip(2);
1355 
1356                     continue;
1357                 }
1358 
1359                 // If start of expression.
1360                 if (ch0 == '$' && ch1 == '{') {
1361                     if (!primed || stringStart != position) {
1362                         if (primed) {
1363                             add(ADD, stringStart, stringStart + 1);
1364                         }
1365 
1366                         add(stringType, stringStart, position);
1367                         primed = true;
1368                     }
1369 
1370                     // Skip ${
1371                     skip(2);
1372 
1373                     // Save expression state.
1374                     final State expressionState = saveState();
1375 
1376                     // Start with one open brace.
1377                     int braceCount = 1;
1378 
1379                     // Scan for the rest of the string.
1380                     while (!atEOF()) {
1381                         // If closing brace.
1382                         if (ch0 == '}') {
1383                             // Break only only if matching brace.
1384                             if (--braceCount == 0) {
1385                                 break;
1386                             }
1387                         } else if (ch0 == '{') {
1388                             // Bump up the brace count.
1389                             braceCount++;
1390                         }
1391 
1392                         // Skip to next character.
1393                         skip(1);
1394                     }
1395 
1396                     // If braces don't match then report an error.
1397                     if (braceCount != 0) {
1398                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1399                     }
1400 
1401                     // Mark end of expression.
1402                     expressionState.setLimit(position);
1403                     // Skip closing brace.
1404                     skip(1);
1405 
1406                     // Start next string.
1407                     stringStart = position;
1408 
1409                     // Concatenate expression.
1410                     add(ADD, expressionState.position, expressionState.position + 1);
1411                     add(LPAREN, expressionState.position, expressionState.position + 1);
1412 
1413                     // Scan expression.
1414                     final Lexer lexer = new Lexer(this, expressionState);
1415                     lexer.lexify();
1416 
1417                     // Close out expression parenthesis.
1418                     add(RPAREN, position - 1, position);
1419 
1420                     continue;
1421                 }
1422 
1423                 // Next character in string.
1424                 skip(1);
1425             }
1426 
1427             // If there is any unemitted string portion.
1428             if (stringStart != limit) {
1429                 // Concatenate remaining string.
1430                 if (primed) {
1431                     add(ADD, stringStart, 1);
1432                 }
1433 
1434                 add(stringType, stringStart, limit);
1435             }
1436         }
1437 
1438     }
1439 
1440     /**
1441      * Edit string for nested expressions.
1442      *
1443      * @param stringType  Type of string literals to emit.
1444      * @param stringState State of lexer at start of string.
1445      */
1446     private void editString(final TokenType stringType, final State stringState) {
1447         // Use special lexer to scan string.
1448         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1449         lexer.lexify();
1450 
1451         // Need to keep lexer informed.
1452         last = stringType;
1453     }
1454 
1455     /**
1456      * Scan over a here string.
1457      *
1458      * @return TRUE if is a here string.
1459      */
1460     private boolean scanHereString(final LineInfoReceiver lir) {
1461         assert ch0 == '<' && ch1 == '<';
1462         if (scripting) {
1463             // Record beginning of here string.
1464             final State saved = saveState();
1465 
1466             // << or <<<
1467             final boolean excludeLastEOL = ch2 != '<';
1468 
1469             if (excludeLastEOL) {
1470                 skip(2);
1471             } else {
1472                 skip(3);
1473             }
1474 
1475             // Scan identifier.
1476             final int identStart = position;
1477             final int identLength = scanIdentifier();
1478 
1479             // Check for identifier.
1480             if (identLength == 0) {
1481                 // Treat as shift.
1482                 restoreState(saved);
1483 
1484                 return false;
1485             }
1486 
1487             // Record rest of line.
1488             final State restState = saveState();
1489             // keep line number updated
1490             int lastLine = line;
1491 
1492             skipLine(false);
1493             lastLine++;
1494             int lastLinePosition = position;
1495             restState.setLimit(position);
1496 
1497             // Record beginning of string.
1498             final State stringState = saveState();
1499             int stringEnd = position;
1500 
1501             // Hunt down marker.
1502             while (!atEOF()) {
1503                 // Skip any whitespace.
1504                 skipWhitespace(false);
1505 
1506                 if (hasHereMarker(identStart, identLength)) {
1507                     break;
1508                 }
1509 
1510                 skipLine(false);
1511                 lastLine++;
1512                 lastLinePosition = position;
1513                 stringEnd = position;
1514             }
1515 
1516             // notify last line information
1517             lir.lineInfo(lastLine, lastLinePosition);
1518 
1519             // Record end of string.
1520             stringState.setLimit(stringEnd);
1521 
1522             // If marker is missing.
1523             if (stringState.isEmpty() || atEOF()) {
1524                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1525                 restoreState(saved);
1526 
1527                 return false;
1528             }
1529 
1530             // Remove last end of line if specified.
1531             if (excludeLastEOL) {
1532                 // Handles \n.
1533                 if (content[stringEnd - 1] == '\n') {
1534                     stringEnd--;
1535                 }
1536 
1537                 // Handles \r and \r\n.
1538                 if (content[stringEnd - 1] == '\r') {
1539                     stringEnd--;
1540                 }
1541 
1542                 // Update end of string.
1543                 stringState.setLimit(stringEnd);
1544             }
1545 
1546             // Edit string if appropriate.
1547             if (scripting && !stringState.isEmpty()) {
1548                 editString(STRING, stringState);
1549             } else {
1550                 // Add here string.
1551                 add(STRING, stringState.position, stringState.limit);
1552             }
1553 
1554             // Scan rest of original line.
1555             final Lexer restLexer = new Lexer(this, restState);
1556 
1557             restLexer.lexify();
1558 
1559             return true;
1560         }
1561 
1562         return false;
1563     }
1564 
1565     /**
1566      * Breaks source content down into lex units, adding tokens to the token
1567      * stream. The routine scans until the stream buffer is full. Can be called
1568      * repeatedly until EOF is detected.
1569      */
1570     public void lexify() {
1571         while (!stream.isFull() || nested) {
1572             // Skip over whitespace.
1573             skipWhitespace(true);
1574 
1575             // Detect end of file.
1576             if (atEOF()) {
1577                 if (!nested) {
1578                     // Add an EOF token at the end.
1579                     add(EOF, position);
1580                 }
1581 
1582                 break;
1583             }
1584 
1585             // Check for comments. Note that we don't scan for regexp and other literals here as
1586             // we may not have enough context to distinguish them from similar looking operators.
1587             // Instead we break on ambiguous operators below and let the parser decide.
1588             if (ch0 == '/' && skipComments()) {
1589                 continue;
1590             }
1591 
1592             if (scripting && ch0 == '#' && skipComments()) {
1593                 continue;
1594             }
1595 
1596             // TokenType for lookup of delimiter or operator.
1597             TokenType type;
1598 
1599             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1600                 // '.' followed by digit.
1601                 // Scan and add a number.
1602                 scanNumber();
1603             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1604                 // Get the number of characters in the token.
1605                 final int typeLength = type.getLength();
1606                 // Skip that many characters.
1607                 skip(typeLength);
1608                 // Add operator token.
1609                 add(type, position - typeLength);
1610                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1611                 // We break to let the parser decide what it is.
1612                 if (canStartLiteral(type)) {
1613                     break;
1614                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1615                     pauseOnNextLeftBrace = false;
1616                     break;
1617                 }
1618             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1619                 // Scan and add identifier or keyword.
1620                 scanIdentifierOrKeyword();
1621             } else if (isStringDelimiter(ch0)) {
1622                 // Scan and add a string.
1623                 scanString(true);
1624             } else if (Character.isDigit(ch0)) {
1625                 // Scan and add a number.
1626                 scanNumber();
1627             } else {
1628                 // Don't recognize this character.
1629                 skip(1);
1630                 add(ERROR, position - 1);
1631             }
1632         }
1633     }
1634 
1635     /**
1636      * Return value of token given its token descriptor.
1637      *
1638      * @param token  Token descriptor.
1639      * @return JavaScript value.
1640      */
1641     Object getValueOf(final long token, final boolean strict) {
1642         final int start = Token.descPosition(token);
1643         final int len   = Token.descLength(token);
1644 
1645         switch (Token.descType(token)) {
1646         case DECIMAL:
1647             return Lexer.valueOf(source.getString(start, len), 10); // number
1648         case OCTAL:
1649             return Lexer.valueOf(source.getString(start, len), 8); // number
1650         case HEXADECIMAL:
1651             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1652         case FLOATING:
1653             final String str   = source.getString(start, len);
1654             final double value = Double.valueOf(str);
1655             if (str.indexOf('.') != -1) {
1656                 return value; //number
1657             }
1658             //anything without an explicit decimal point is still subject to a
1659             //"representable as int or long" check. Then the programmer does not
1660             //explicitly code something as a double. For example new Color(int, int, int)
1661             //and new Color(float, float, float) will get ambiguous for cases like
1662             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1663             //yet we don't want e.g. 1e6 to be a double unnecessarily
1664             if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) {
1665                 return (int)value;
1666             } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) {
1667                 return (long)value;
1668             }
1669             return value;
1670         case STRING:
1671             return source.getString(start, len); // String
1672         case ESCSTRING:
1673             return valueOfString(start, len, strict); // String
1674         case IDENT:
1675             return valueOfIdent(start, len); // String
1676         case REGEX:
1677             return valueOfPattern(start, len); // RegexToken::LexerToken
1678         case XML:
1679             return valueOfXML(start, len); // XMLToken::LexerToken
1680         case DIRECTIVE_COMMENT:
1681             return source.getString(start, len);
1682         default:
1683             break;
1684         }
1685 
1686         return null;
1687     }
1688 
1689     /**
1690      * Get the correctly localized error message for a given message id format arguments
1691      * @param msgId message id
1692      * @param args  format arguments
1693      * @return message
1694      */
1695     protected static String message(final String msgId, final String... args) {
1696         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1697     }
1698 
1699     /**
1700      * Generate a runtime exception
1701      *
1702      * @param message       error message
1703      * @param type          token type
1704      * @param start         start position of lexed error
1705      * @param length        length of lexed error
1706      * @throws ParserException  unconditionally
1707      */
1708     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1709         final long token     = Token.toDesc(type, start, length);
1710         final int  pos       = Token.descPosition(token);
1711         final int  lineNum   = source.getLine(pos);
1712         final int  columnNum = source.getColumn(pos);
1713         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1714         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1715     }
1716 
1717     /**
1718      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1719      * This is the abstract superclass
1720      */
1721     public static abstract class LexerToken implements Serializable {
1722         private static final long serialVersionUID = 1L;
1723 
1724         private final String expression;
1725 
1726         /**
1727          * Constructor
1728          * @param expression token expression
1729          */
1730         protected LexerToken(final String expression) {
1731             this.expression = expression;
1732         }
1733 
1734         /**
1735          * Get the expression
1736          * @return expression
1737          */
1738         public String getExpression() {
1739             return expression;
1740         }
1741     }
1742 
1743     /**
1744      * Temporary container for regular expressions.
1745      */
1746     public static class RegexToken extends LexerToken {
1747         private static final long serialVersionUID = 1L;
1748 
1749         /** Options. */
1750         private final String options;
1751 
1752         /**
1753          * Constructor.
1754          *
1755          * @param expression  regexp expression
1756          * @param options     regexp options
1757          */
1758         public RegexToken(final String expression, final String options) {
1759             super(expression);
1760             this.options = options;
1761         }
1762 
1763         /**
1764          * Get regexp options
1765          * @return options
1766          */
1767         public String getOptions() {
1768             return options;
1769         }
1770 
1771         @Override
1772         public String toString() {
1773             return '/' + getExpression() + '/' + options;
1774         }
1775     }
1776 
1777     /**
1778      * Temporary container for XML expression.
1779      */
1780     public static class XMLToken extends LexerToken {
1781         private static final long serialVersionUID = 1L;
1782 
1783         /**
1784          * Constructor.
1785          *
1786          * @param expression  XML expression
1787          */
1788         public XMLToken(final String expression) {
1789             super(expression);
1790         }
1791     }
1792 }