1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  31 import static jdk.nashorn.internal.parser.TokenType.EOF;
  32 import static jdk.nashorn.internal.parser.TokenType.EOL;
  33 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  34 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  35 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  36 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  37 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  38 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  39 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  40 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  41 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  42 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  43 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  44 import static jdk.nashorn.internal.parser.TokenType.STRING;
  45 import static jdk.nashorn.internal.parser.TokenType.XML;
  46 
  47 import jdk.nashorn.internal.runtime.ECMAErrors;
  48 import jdk.nashorn.internal.runtime.ErrorManager;
  49 import jdk.nashorn.internal.runtime.JSErrorType;
  50 import jdk.nashorn.internal.runtime.ParserException;
  51 import jdk.nashorn.internal.runtime.Source;
  52 import jdk.nashorn.internal.runtime.options.Options;
  53 
  54 /**
  55  * Responsible for converting source content into a stream of tokens.
  56  *
  57  */
  58 @SuppressWarnings("fallthrough")
  59 public class Lexer extends Scanner {
  60     private static final long MIN_INT_L = Integer.MIN_VALUE;
  61     private static final long MAX_INT_L = Integer.MAX_VALUE;
  62 
  63     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  64 
  65     /** Content source. */
  66     private final Source source;
  67 
  68     /** Buffered stream for tokens. */
  69     private final TokenStream stream;
  70 
  71     /** True if here and edit strings are supported. */
  72     private final boolean scripting;
  73 
  74     /** True if a nested scan. (scan to completion, no EOF.) */
  75     private final boolean nested;
  76 
  77     /** Pending new line number and position. */
  78     private int pendingLine;
  79 
  80     /** Position of last EOL + 1. */
  81     private int linePosition;
  82 
  83     /** Type of last token added. */
  84     private TokenType last;
  85 
  86     private static final String SPACETAB = " \t";  // ASCII space and tab
  87     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
  88 
  89     private static final String JSON_WHITESPACE_EOL = LFCR;
  90     private static final String JSON_WHITESPACE     = SPACETAB + LFCR;
  91 
  92     private static final String JAVASCRIPT_WHITESPACE_EOL =
  93         LFCR +
  94         "\u2028" + // line separator
  95         "\u2029"   // paragraph separator
  96         ;
  97     private static final String JAVASCRIPT_WHITESPACE =
  98         SPACETAB +
  99         JAVASCRIPT_WHITESPACE_EOL +
 100         "\u000b" + // tabulation line
 101         "\u000c" + // ff (ctrl-l)
 102         "\u00a0" + // Latin-1 space
 103         "\u1680" + // Ogham space mark
 104         "\u180e" + // separator, Mongolian vowel
 105         "\u2000" + // en quad
 106         "\u2001" + // em quad
 107         "\u2002" + // en space
 108         "\u2003" + // em space
 109         "\u2004" + // three-per-em space
 110         "\u2005" + // four-per-em space
 111         "\u2006" + // six-per-em space
 112         "\u2007" + // figure space
 113         "\u2008" + // punctuation space
 114         "\u2009" + // thin space
 115         "\u200a" + // hair space
 116         "\u202f" + // narrow no-break space
 117         "\u205f" + // medium mathematical space
 118         "\u3000" + // ideographic space
 119         "\ufeff"   // byte order mark
 120         ;
 121 
 122     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 123         "\\u000a" + // line feed
 124         "\\u000d" + // carriage return (ctrl-m)
 125         "\\u2028" + // line separator
 126         "\\u2029" + // paragraph separator
 127         "\\u0009" + // tab
 128         "\\u0020" + // ASCII space
 129         "\\u000b" + // tabulation line
 130         "\\u000c" + // ff (ctrl-l)
 131         "\\u00a0" + // Latin-1 space
 132         "\\u1680" + // Ogham space mark
 133         "\\u180e" + // separator, Mongolian vowel
 134         "\\u2000" + // en quad
 135         "\\u2001" + // em quad
 136         "\\u2002" + // en space
 137         "\\u2003" + // em space
 138         "\\u2004" + // three-per-em space
 139         "\\u2005" + // four-per-em space
 140         "\\u2006" + // six-per-em space
 141         "\\u2007" + // figure space
 142         "\\u2008" + // punctuation space
 143         "\\u2009" + // thin space
 144         "\\u200a" + // hair space
 145         "\\u202f" + // narrow no-break space
 146         "\\u205f" + // medium mathematical space
 147         "\\u3000" + // ideographic space
 148         "\\ufeff"   // byte order mark
 149         ;
 150 
 151     static String unicodeEscape(final char ch) {
 152         final StringBuilder sb = new StringBuilder();
 153 
 154         sb.append("\\u");
 155 
 156         final String hex = Integer.toHexString(ch);
 157         for (int i = hex.length(); i < 4; i++) {
 158             sb.append('0');
 159         }
 160         sb.append(hex);
 161 
 162         return sb.toString();
 163     }
 164 
 165     /**
 166      * Constructor
 167      *
 168      * @param source    the source
 169      * @param stream    the token stream to lex
 170      */
 171     public Lexer(final Source source, final TokenStream stream) {
 172         this(source, stream, false);
 173     }
 174 
 175     /**
 176      * Constructor
 177      *
 178      * @param source    the source
 179      * @param stream    the token stream to lex
 180      * @param scripting are we in scripting mode
 181      */
 182     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 183         super(source.getContent(), 1, 0, source.getLength());
 184 
 185         this.source      = source;
 186         this.stream      = stream;
 187         this.scripting   = scripting;
 188         this.nested      = false;
 189         this.pendingLine = 1;
 190         this.last        = EOL;
 191     }
 192 
 193     private Lexer(final Lexer lexer, final State state) {
 194         super(lexer, state);
 195 
 196         source = lexer.source;
 197         stream = lexer.stream;
 198         scripting = lexer.scripting;
 199         nested = true;
 200 
 201         pendingLine = state.pendingLine;
 202         linePosition = state.linePosition;
 203         last = EOL;
 204     }
 205 
 206     static class State extends Scanner.State {
 207         /** Pending new line number and position. */
 208         public final int pendingLine;
 209 
 210         /** Position of last EOL + 1. */
 211         public final int linePosition;
 212 
 213         /** Type of last token added. */
 214         public final TokenType last;
 215 
 216         /*
 217          * Constructor.
 218          */
 219 
 220         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 221             super(position, limit, line);
 222 
 223             this.pendingLine = pendingLine;
 224             this.linePosition = linePosition;
 225             this.last = last;
 226         }
 227     }
 228 
 229     /**
 230      * Save the state of the scan.
 231      *
 232      * @return Captured state.
 233      */
 234     @Override
 235     State saveState() {
 236         return new State(position, limit, line, pendingLine, linePosition, last);
 237     }
 238 
 239     /**
 240      * Restore the state of the scan.
 241      *
 242      * @param state
 243      *            Captured state.
 244      */
 245     void restoreState(final State state) {
 246         super.restoreState(state);
 247 
 248         pendingLine = state.pendingLine;
 249         linePosition = state.linePosition;
 250         last = state.last;
 251     }
 252 
 253     /**
 254      * Add a new token to the stream.
 255      *
 256      * @param type
 257      *            Token type.
 258      * @param start
 259      *            Start position.
 260      * @param end
 261      *            End position.
 262      */
 263     protected void add(final TokenType type, final int start, final int end) {
 264         // Record last token.
 265         last = type;
 266 
 267         // Only emit the last EOL in a cluster.
 268         if (type == EOL) {
 269             pendingLine = end;
 270             linePosition = start;
 271         } else {
 272             // Write any pending EOL to stream.
 273             if (pendingLine != -1) {
 274                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 275                 pendingLine = -1;
 276             }
 277 
 278             // Write token to stream.
 279             stream.put(Token.toDesc(type, start, end - start));
 280         }
 281     }
 282 
 283     /**
 284      * Add a new token to the stream.
 285      *
 286      * @param type
 287      *            Token type.
 288      * @param start
 289      *            Start position.
 290      */
 291     protected void add(final TokenType type, final int start) {
 292         add(type, start, position);
 293     }
 294 
 295     /**
 296      * Return the String of valid whitespace characters for regular
 297      * expressions in JavaScript
 298      * @return regexp whitespace string
 299      */
 300     public static String getWhitespaceRegExp() {
 301         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 302     }
 303 
 304     /**
 305      * Skip end of line.
 306      *
 307      * @param addEOL true if EOL token should be recorded.
 308      */
 309     private void skipEOL(final boolean addEOL) {
 310 
 311         if (ch0 == '\r') { // detect \r\n pattern
 312             skip(1);
 313             if (ch0 == '\n') {
 314                 skip(1);
 315             }
 316         } else { // all other space, ch0 is guaranteed to be EOL or \0
 317             skip(1);
 318         }
 319 
 320         // bump up line count
 321         line++;
 322 
 323         if (addEOL) {
 324             // Add an EOL token.
 325             add(EOL, position, line);
 326         }
 327     }
 328 
 329     /**
 330      * Skip over rest of line including end of line.
 331      *
 332      * @param addEOL true if EOL token should be recorded.
 333      */
 334     private void skipLine(final boolean addEOL) {
 335         // Ignore characters.
 336         while (!isEOL(ch0) && !atEOF()) {
 337             skip(1);
 338         }
 339         // Skip over end of line.
 340         skipEOL(addEOL);
 341     }
 342 
 343     /**
 344      * Test whether a char is valid JavaScript whitespace
 345      * @param ch a char
 346      * @return true if valid JavaScript whitespace
 347      */
 348     public static boolean isJSWhitespace(final char ch) {
 349         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 350     }
 351 
 352     /**
 353      * Test whether a char is valid JavaScript end of line
 354      * @param ch a char
 355      * @return true if valid JavaScript end of line
 356      */
 357     public static boolean isJSEOL(final char ch) {
 358         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 359     }
 360 
 361     /**
 362      * Test whether a char is valid JSON whitespace
 363      * @param ch a char
 364      * @return true if valid JSON whitespace
 365      */
 366     public static boolean isJsonWhitespace(final char ch) {
 367         return JSON_WHITESPACE.indexOf(ch) != -1;
 368     }
 369 
 370     /**
 371      * Test whether a char is valid JSON end of line
 372      * @param ch a char
 373      * @return true if valid JSON end of line
 374      */
 375     public static boolean isJsonEOL(final char ch) {
 376         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
 377     }
 378 
 379     /**
 380      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 381      * strings ('`') in scripting mode.
 382      * @param ch a char
 383      * @return true if string delimiter
 384      */
 385     protected boolean isStringDelimiter(final char ch) {
 386         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 387     }
 388 
 389     /**
 390      * Test whether a char is valid JavaScript whitespace
 391      * @param ch a char
 392      * @return true if valid JavaScript whitespace
 393      */
 394     protected boolean isWhitespace(final char ch) {
 395         return Lexer.isJSWhitespace(ch);
 396     }
 397 
 398     /**
 399      * Test whether a char is valid JavaScript end of line
 400      * @param ch a char
 401      * @return true if valid JavaScript end of line
 402      */
 403     protected boolean isEOL(final char ch) {
 404         return Lexer.isJSEOL(ch);
 405     }
 406 
 407     /**
 408      * Skip over whitespace and detect end of line, adding EOL tokens if
 409      * encountered.
 410      *
 411      * @param addEOL true if EOL tokens should be recorded.
 412      */
 413     private void skipWhitespace(final boolean addEOL) {
 414         while (isWhitespace(ch0)) {
 415             if (isEOL(ch0)) {
 416                 skipEOL(addEOL);
 417             } else {
 418                 skip(1);
 419             }
 420         }
 421     }
 422 
 423     /**
 424      * Skip over comments.
 425      *
 426      * @return True if a comment.
 427      */
 428     protected boolean skipComments() {
 429         // Save the current position.
 430         final int start = position;
 431 
 432         if (ch0 == '/') {
 433             // Is it a // comment.
 434             if (ch1 == '/') {
 435                 // Skip over //.
 436                 skip(2);
 437                 // Scan for EOL.
 438                 while (!atEOF() && !isEOL(ch0)) {
 439                     skip(1);
 440                 }
 441                 // Did detect a comment.
 442                 add(COMMENT, start);
 443                 return true;
 444             } else if (ch1 == '*') {
 445                 // Skip over /*.
 446                 skip(2);
 447                 // Scan for */.
 448                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 449                     // If end of line handle else skip character.
 450                     if (isEOL(ch0)) {
 451                         skipEOL(true);
 452                     } else {
 453                         skip(1);
 454                     }
 455                 }
 456 
 457                 if (atEOF()) {
 458                     // TODO - Report closing */ missing in parser.
 459                     add(ERROR, start);
 460                 } else {
 461                     // Skip */.
 462                     skip(2);
 463                 }
 464 
 465                 // Did detect a comment.
 466                 add(COMMENT, start);
 467                 return true;
 468             }
 469         } else if (ch0 == '#') {
 470             assert scripting;
 471             // shell style comment
 472             // Skip over #.
 473             skip(1);
 474             // Scan for EOL.
 475             while (!atEOF() && !isEOL(ch0)) {
 476                 skip(1);
 477             }
 478             // Did detect a comment.
 479             add(COMMENT, start);
 480             return true;
 481         }
 482 
 483         // Not a comment.
 484         return false;
 485     }
 486 
 487     /**
 488      * Convert a regex token to a token object.
 489      *
 490      * @param start  Position in source content.
 491      * @param length Length of regex token.
 492      * @return Regex token object.
 493      */
 494     public RegexToken valueOfPattern(final int start, final int length) {
 495         // Save the current position.
 496         final int savePosition = position;
 497         // Reset to beginning of content.
 498         reset(start);
 499         // Buffer for recording characters.
 500         final StringBuilder sb = new StringBuilder(length);
 501 
 502         // Skip /.
 503         skip(1);
 504         boolean inBrackets = false;
 505         // Scan for closing /, stopping at end of line.
 506         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 507             // Skip over escaped character.
 508             if (ch0 == '\\') {
 509                 sb.append(ch0);
 510                 sb.append(ch1);
 511                 skip(2);
 512             } else {
 513                 if (ch0 == '[') {
 514                     inBrackets = true;
 515                 } else if (ch0 == ']') {
 516                     inBrackets = false;
 517                 }
 518 
 519                 // Skip literal character.
 520                 sb.append(ch0);
 521                 skip(1);
 522             }
 523         }
 524 
 525         // Get pattern as string.
 526         final String regex = sb.toString();
 527 
 528         // Skip /.
 529         skip(1);
 530 
 531         // Options as string.
 532         final String options = source.getString(position, scanIdentifier());
 533 
 534         reset(savePosition);
 535 
 536         // Compile the pattern.
 537         return new RegexToken(regex, options);
 538     }
 539 
 540     /**
 541      * Return true if the given token can be the beginning of a literal.
 542      *
 543      * @param token a token
 544      * @return true if token can start a literal.
 545      */
 546     public boolean canStartLiteral(final TokenType token) {
 547         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 548     }
 549 
 550     /**
 551      * interface to receive line information for multi-line literals.
 552      */
 553     protected interface LineInfoReceiver {
 554         /**
 555          * Receives line information
 556          * @param line last line number
 557          * @param linePosition position of last line
 558          */
 559         public void lineInfo(int line, int linePosition);
 560     }
 561 
 562     /**
 563      * Check whether the given token represents the beginning of a literal. If so scan
 564      * the literal and return <tt>true</tt>, otherwise return false.
 565      *
 566      * @param token the token.
 567      * @param startTokenType the token type.
 568      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 569      * @return True if a literal beginning with startToken was found and scanned.
 570      */
 571     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 572         // Check if it can be a literal.
 573         if (!canStartLiteral(startTokenType)) {
 574             return false;
 575         }
 576         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 577         if (stream.get(stream.last()) != token) {
 578             return false;
 579         }
 580         // Rewind to token start position
 581         reset(Token.descPosition(token));
 582 
 583         if (ch0 == '/') {
 584             return scanRegEx();
 585         } else if (ch0 == '<') {
 586             if (ch1 == '<') {
 587                 return scanHereString(lir);
 588             } else if (Character.isJavaIdentifierStart(ch1)) {
 589                 return scanXMLLiteral();
 590             }
 591         }
 592 
 593         return false;
 594     }
 595 
 596     /**
 597      * Scan over regex literal.
 598      *
 599      * @return True if a regex literal.
 600      */
 601     private boolean scanRegEx() {
 602         assert ch0 == '/';
 603         // Make sure it's not a comment.
 604         if (ch1 != '/' && ch1 != '*') {
 605             // Record beginning of literal.
 606             final int start = position;
 607             // Skip /.
 608             skip(1);
 609             boolean inBrackets = false;
 610 
 611             // Scan for closing /, stopping at end of line.
 612             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 613                 // Skip over escaped character.
 614                 if (ch0 == '\\') {
 615                     skip(1);
 616                     if (isEOL(ch0)) {
 617                         reset(start);
 618                         return false;
 619                     }
 620                     skip(1);
 621                 } else {
 622                     if (ch0 == '[') {
 623                         inBrackets = true;
 624                     } else if (ch0 == ']') {
 625                         inBrackets = false;
 626                     }
 627 
 628                     // Skip literal character.
 629                     skip(1);
 630                 }
 631             }
 632 
 633             // If regex literal.
 634             if (ch0 == '/') {
 635                 // Skip /.
 636                 skip(1);
 637 
 638                 // Skip over options.
 639                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 640                     skip(1);
 641                 }
 642 
 643                 // Add regex token.
 644                 add(REGEX, start);
 645                 // Regex literal detected.
 646                 return true;
 647             }
 648 
 649             // False start try again.
 650             reset(start);
 651         }
 652 
 653         // Regex literal not detected.
 654         return false;
 655     }
 656 
 657     /**
 658      * Convert a digit to a integer.  Can't use Character.digit since we are
 659      * restricted to ASCII by the spec.
 660      *
 661      * @param ch   Character to convert.
 662      * @param base Numeric base.
 663      *
 664      * @return The converted digit or -1 if invalid.
 665      */
 666     protected static int convertDigit(final char ch, final int base) {
 667         int digit;
 668 
 669         if ('0' <= ch && ch <= '9') {
 670             digit = ch - '0';
 671         } else if ('A' <= ch && ch <= 'Z') {
 672             digit = ch - 'A' + 10;
 673         } else if ('a' <= ch && ch <= 'z') {
 674             digit = ch - 'a' + 10;
 675         } else {
 676             return -1;
 677         }
 678 
 679         return digit < base ? digit : -1;
 680     }
 681 
 682 
 683     /**
 684      * Get the value of a hexadecimal numeric sequence.
 685      *
 686      * @param length Number of digits.
 687      * @param type   Type of token to report against.
 688      * @return Value of sequence or < 0 if no digits.
 689      */
 690     private int hexSequence(final int length, final TokenType type) {
 691         int value = 0;
 692 
 693         for (int i = 0; i < length; i++) {
 694             final int digit = convertDigit(ch0, 16);
 695 
 696             if (digit == -1) {
 697                 error(Lexer.message("invalid.hex"), type, position, limit);
 698                 return i == 0 ? -1 : value;
 699             }
 700 
 701             value = digit | value << 4;
 702             skip(1);
 703         }
 704 
 705         return value;
 706     }
 707 
 708     /**
 709      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 710      *
 711      * @return Value of sequence.
 712      */
 713     private int octalSequence() {
 714         int value = 0;
 715 
 716         for (int i = 0; i < 3; i++) {
 717             final int digit = convertDigit(ch0, 8);
 718 
 719             if (digit == -1) {
 720                 break;
 721             }
 722             value = digit | value << 3;
 723             skip(1);
 724 
 725             if (i == 1 && value >= 32) {
 726                 break;
 727             }
 728         }
 729         return value;
 730     }
 731 
 732     /**
 733      * Convert a string to a JavaScript identifier.
 734      *
 735      * @param start  Position in source content.
 736      * @param length Length of token.
 737      * @return Ident string or null if an error.
 738      */
 739     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 740         // Save the current position.
 741         final int savePosition = position;
 742         // End of scan.
 743         final int end = start + length;
 744         // Reset to beginning of content.
 745         reset(start);
 746         // Buffer for recording characters.
 747         final StringBuilder sb = new StringBuilder(length);
 748 
 749         // Scan until end of line or end of file.
 750         while (!atEOF() && position < end && !isEOL(ch0)) {
 751             // If escape character.
 752             if (ch0 == '\\' && ch1 == 'u') {
 753                 skip(2);
 754                 final int ch = hexSequence(4, TokenType.IDENT);
 755                 if (isWhitespace((char)ch)) {
 756                     return null;
 757                 }
 758                 if (ch < 0) {
 759                     sb.append('\\');
 760                     sb.append('u');
 761                 } else {
 762                     sb.append((char)ch);
 763                 }
 764             } else {
 765                 // Add regular character.
 766                 sb.append(ch0);
 767                 skip(1);
 768             }
 769         }
 770 
 771         // Restore position.
 772         reset(savePosition);
 773 
 774         return sb.toString();
 775     }
 776 
 777     /**
 778      * Scan over and identifier or keyword. Handles identifiers containing
 779      * encoded Unicode chars.
 780      *
 781      * Example:
 782      *
 783      * var \u0042 = 44;
 784      */
 785     private void scanIdentifierOrKeyword() {
 786         // Record beginning of identifier.
 787         final int start = position;
 788         // Scan identifier.
 789         final int length = scanIdentifier();
 790         // Check to see if it is a keyword.
 791         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 792         // Add keyword or identifier token.
 793         add(type, start);
 794     }
 795 
 796     /**
 797      * Convert a string to a JavaScript string object.
 798      *
 799      * @param start  Position in source content.
 800      * @param length Length of token.
 801      * @return JavaScript string object.
 802      */
 803     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 804         // Save the current position.
 805         final int savePosition = position;
 806         // Calculate the end position.
 807         final int end = start + length;
 808         // Reset to beginning of string.
 809         reset(start);
 810 
 811         // Buffer for recording characters.
 812         final StringBuilder sb = new StringBuilder(length);
 813 
 814         // Scan until end of string.
 815         while (position < end) {
 816             // If escape character.
 817             if (ch0 == '\\') {
 818                 skip(1);
 819 
 820                 final char next = ch0;
 821                 final int afterSlash = position;
 822 
 823                 skip(1);
 824 
 825                 // Special characters.
 826                 switch (next) {
 827                 case '0':
 828                 case '1':
 829                 case '2':
 830                 case '3':
 831                 case '4':
 832                 case '5':
 833                 case '6':
 834                 case '7': {
 835                     if (strict) {
 836                         // "\0" itself is allowed in strict mode. Only other 'real'
 837                         // octal escape sequences are not allowed (eg. "\02", "\31").
 838                         // See section 7.8.4 String literals production EscapeSequence
 839                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 840                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 841                         }
 842                     }
 843                     reset(afterSlash);
 844                     // Octal sequence.
 845                     final int ch = octalSequence();
 846 
 847                     if (ch < 0) {
 848                         sb.append('\\');
 849                         sb.append('x');
 850                     } else {
 851                         sb.append((char)ch);
 852                     }
 853                     break;
 854                 }
 855                 case 'n':
 856                     sb.append('\n');
 857                     break;
 858                 case 't':
 859                     sb.append('\t');
 860                     break;
 861                 case 'b':
 862                     sb.append('\b');
 863                     break;
 864                 case 'f':
 865                     sb.append('\f');
 866                     break;
 867                 case 'r':
 868                     sb.append('\r');
 869                     break;
 870                 case '\'':
 871                     sb.append('\'');
 872                     break;
 873                 case '\"':
 874                     sb.append('\"');
 875                     break;
 876                 case '\\':
 877                     sb.append('\\');
 878                     break;
 879                 case '\r': // CR | CRLF
 880                     if (ch0 == '\n') {
 881                         skip(1);
 882                     }
 883                     // fall through
 884                 case '\n': // LF
 885                 case '\u2028': // LS
 886                 case '\u2029': // PS
 887                     // continue on the next line, slash-return continues string
 888                     // literal
 889                     break;
 890                 case 'x': {
 891                     // Hex sequence.
 892                     final int ch = hexSequence(2, STRING);
 893 
 894                     if (ch < 0) {
 895                         sb.append('\\');
 896                         sb.append('x');
 897                     } else {
 898                         sb.append((char)ch);
 899                     }
 900                 }
 901                     break;
 902                 case 'u': {
 903                     // Unicode sequence.
 904                     final int ch = hexSequence(4, STRING);
 905 
 906                     if (ch < 0) {
 907                         sb.append('\\');
 908                         sb.append('u');
 909                     } else {
 910                         sb.append((char)ch);
 911                     }
 912                 }
 913                     break;
 914                 case 'v':
 915                     sb.append('\u000B');
 916                     break;
 917                 // All other characters.
 918                 default:
 919                     sb.append(next);
 920                     break;
 921                 }
 922             } else {
 923                 // Add regular character.
 924                 sb.append(ch0);
 925                 skip(1);
 926             }
 927         }
 928 
 929         // Restore position.
 930         reset(savePosition);
 931 
 932         return sb.toString();
 933     }
 934 
 935     /**
 936      * Scan over a string literal.
 937      * @param add true if we nare not just scanning but should actually modify the token stream
 938      */
 939     protected void scanString(final boolean add) {
 940         // Type of string.
 941         TokenType type = STRING;
 942         // Record starting quote.
 943         final char quote = ch0;
 944         // Skip over quote.
 945         skip(1);
 946 
 947         // Record beginning of string content.
 948         final State stringState = saveState();
 949 
 950         // Scan until close quote or end of line.
 951         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 952             // Skip over escaped character.
 953             if (ch0 == '\\') {
 954                 type = ESCSTRING;
 955                 skip(1);
 956                 if (! isEscapeCharacter(ch0)) {
 957                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 958                 }
 959                 if (isEOL(ch0)) {
 960                     // Multiline string literal
 961                     skipEOL(false);
 962                     continue;
 963                 }
 964             }
 965             // Skip literal character.
 966             skip(1);
 967         }
 968 
 969         // If close quote.
 970         if (ch0 == quote) {
 971             // Skip close quote.
 972             skip(1);
 973         } else {
 974             error(Lexer.message("missing.close.quote"), STRING, position, limit);
 975         }
 976 
 977         // If not just scanning.
 978         if (add) {
 979             // Record end of string.
 980             stringState.setLimit(position - 1);
 981 
 982             if (scripting && !stringState.isEmpty()) {
 983                 switch (quote) {
 984                 case '`':
 985                     // Mark the beginning of an exec string.
 986                     add(EXECSTRING, stringState.position, stringState.limit);
 987                     // Frame edit string with left brace.
 988                     add(LBRACE, stringState.position, stringState.position);
 989                     // Process edit string.
 990                     editString(type, stringState);
 991                     // Frame edit string with right brace.
 992                     add(RBRACE, stringState.limit, stringState.limit);
 993                     break;
 994                 case '"':
 995                     // Only edit double quoted strings.
 996                     editString(type, stringState);
 997                     break;
 998                 case '\'':
 999                     // Add string token without editing.
1000                     add(type, stringState.position, stringState.limit);
1001                     break;
1002                 default:
1003                     break;
1004                 }
1005             } else {
1006                 /// Add string token without editing.
1007                 add(type, stringState.position, stringState.limit);
1008             }
1009         }
1010     }
1011 
1012     /**
1013      * Is the given character a valid escape char after "\" ?
1014      *
1015      * @param ch character to be checked
1016      * @return if the given character is valid after "\"
1017      */
1018     protected boolean isEscapeCharacter(final char ch) {
1019         return true;
1020     }
1021 
1022     /**
1023      * Convert string to number.
1024      *
1025      * @param valueString  String to convert.
1026      * @param radix        Numeric base.
1027      * @return Converted number.
1028      */
1029     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1030         try {
1031             final long value = Long.parseLong(valueString, radix);
1032             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1033                 return Integer.valueOf((int)value);
1034             }
1035             return Long.valueOf(value);
1036         } catch (final NumberFormatException e) {
1037             if (radix == 10) {
1038                 return Double.valueOf(valueString);
1039             }
1040 
1041             double value = 0.0;
1042 
1043             for (int i = 0; i < valueString.length(); i++) {
1044                 final char ch = valueString.charAt(i);
1045                 // Preverified, should always be a valid digit.
1046                 final int digit = convertDigit(ch, radix);
1047                 value *= radix;
1048                 value += digit;
1049             }
1050 
1051             return value;
1052         }
1053     }
1054 
1055     /**
1056      * Scan a number.
1057      */
1058     protected void scanNumber() {
1059         // Record beginning of number.
1060         final int start = position;
1061         // Assume value is a decimal.
1062         TokenType type = DECIMAL;
1063 
1064         // First digit of number.
1065         int digit = convertDigit(ch0, 10);
1066 
1067         // If number begins with 0x.
1068         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1069             // Skip over 0xN.
1070             skip(3);
1071             // Skip over remaining digits.
1072             while (convertDigit(ch0, 16) != -1) {
1073                 skip(1);
1074             }
1075 
1076             type = HEXADECIMAL;
1077         } else {
1078             // Check for possible octal constant.
1079             boolean octal = digit == 0;
1080             // Skip first digit if not leading '.'.
1081             if (digit != -1) {
1082                 skip(1);
1083             }
1084 
1085             // Skip remaining digits.
1086             while ((digit = convertDigit(ch0, 10)) != -1) {
1087                 // Check octal only digits.
1088                 octal = octal && digit < 8;
1089                 // Skip digit.
1090                 skip(1);
1091             }
1092 
1093             if (octal && position - start > 1) {
1094                 type = OCTAL;
1095             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1096                 // Must be a double.
1097                 if (ch0 == '.') {
1098                     // Skip period.
1099                     skip(1);
1100                     // Skip mantissa.
1101                     while (convertDigit(ch0, 10) != -1) {
1102                         skip(1);
1103                     }
1104                 }
1105 
1106                 // Detect exponent.
1107                 if (ch0 == 'E' || ch0 == 'e') {
1108                     // Skip E.
1109                     skip(1);
1110                     // Detect and skip exponent sign.
1111                     if (ch0 == '+' || ch0 == '-') {
1112                         skip(1);
1113                     }
1114                     // Skip exponent.
1115                     while (convertDigit(ch0, 10) != -1) {
1116                         skip(1);
1117                     }
1118                 }
1119 
1120                 type = FLOATING;
1121             }
1122         }
1123 
1124         if (Character.isJavaIdentifierStart(ch0)) {
1125             error(Lexer.message("missing.space.after.number"), type, position, 1);
1126         }
1127 
1128         // Add number token.
1129         add(type, start);
1130     }
1131 
1132     /**
1133      * Convert a regex token to a token object.
1134      *
1135      * @param start  Position in source content.
1136      * @param length Length of regex token.
1137      * @return Regex token object.
1138      */
1139     XMLToken valueOfXML(final int start, final int length) {
1140         return new XMLToken(source.getString(start, length));
1141     }
1142 
1143     /**
1144      * Scan over a XML token.
1145      *
1146      * @return TRUE if is an XML literal.
1147      */
1148     private boolean scanXMLLiteral() {
1149         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1150         if (XML_LITERALS) {
1151             // Record beginning of xml expression.
1152             final int start = position;
1153 
1154             int openCount = 0;
1155 
1156             do {
1157                 if (ch0 == '<') {
1158                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1159                         skip(3);
1160                         openCount--;
1161                     } else if (Character.isJavaIdentifierStart(ch1)) {
1162                         skip(2);
1163                         openCount++;
1164                     } else if (ch1 == '?') {
1165                         skip(2);
1166                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1167                         skip(4);
1168                     } else {
1169                         reset(start);
1170                         return false;
1171                     }
1172 
1173                     while (!atEOF() && ch0 != '>') {
1174                         if (ch0 == '/' && ch1 == '>') {
1175                             openCount--;
1176                             skip(1);
1177                             break;
1178                         } else if (ch0 == '\"' || ch0 == '\'') {
1179                             scanString(false);
1180                         } else {
1181                             skip(1);
1182                         }
1183                     }
1184 
1185                     if (ch0 != '>') {
1186                         reset(start);
1187                         return false;
1188                     }
1189 
1190                     skip(1);
1191                 } else if (atEOF()) {
1192                     reset(start);
1193                     return false;
1194                 } else {
1195                     skip(1);
1196                 }
1197             } while (openCount > 0);
1198 
1199             add(XML, start);
1200             return true;
1201         }
1202 
1203         return false;
1204     }
1205 
1206     /**
1207      * Scan over identifier characters.
1208      *
1209      * @return Length of identifier or zero if none found.
1210      */
1211     private int scanIdentifier() {
1212         final int start = position;
1213 
1214         // Make sure first character is valid start character.
1215         if (ch0 == '\\' && ch1 == 'u') {
1216             skip(2);
1217             final int ch = hexSequence(4, TokenType.IDENT);
1218 
1219             if (!Character.isJavaIdentifierStart(ch)) {
1220                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1221             }
1222         } else if (!Character.isJavaIdentifierStart(ch0)) {
1223             // Not an identifier.
1224             return 0;
1225         }
1226 
1227         // Make sure remaining characters are valid part characters.
1228         while (!atEOF()) {
1229             if (ch0 == '\\' && ch1 == 'u') {
1230                 skip(2);
1231                 final int ch = hexSequence(4, TokenType.IDENT);
1232 
1233                 if (!Character.isJavaIdentifierPart(ch)) {
1234                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1235                 }
1236             } else if (Character.isJavaIdentifierPart(ch0)) {
1237                 skip(1);
1238             } else {
1239                 break;
1240             }
1241         }
1242 
1243         // Length of identifier sequence.
1244         return position - start;
1245     }
1246 
1247     /**
1248      * Compare two identifiers (in content) for equality.
1249      *
1250      * @param aStart  Start of first identifier.
1251      * @param aLength Length of first identifier.
1252      * @param bStart  Start of second identifier.
1253      * @param bLength Length of second identifier.
1254      * @return True if equal.
1255      */
1256     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1257         if (aLength == bLength) {
1258             for (int i = 0; i < aLength; i++) {
1259                 if (content[aStart + i] != content[bStart + i]) {
1260                     return false;
1261                 }
1262             }
1263 
1264             return true;
1265         }
1266 
1267         return false;
1268     }
1269 
1270     /**
1271      * Detect if a line starts with a marker identifier.
1272      *
1273      * @param identStart  Start of identifier.
1274      * @param identLength Length of identifier.
1275      * @return True if detected.
1276      */
1277     private boolean hasHereMarker(final int identStart, final int identLength) {
1278         // Skip any whitespace.
1279         skipWhitespace(false);
1280 
1281         return identifierEqual(identStart, identLength, position, scanIdentifier());
1282     }
1283 
1284     /**
1285      * Lexer to service edit strings.
1286      */
1287     private static class EditStringLexer extends Lexer {
1288         /** Type of string literals to emit. */
1289         final TokenType stringType;
1290 
1291         /*
1292          * Constructor.
1293          */
1294 
1295         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1296             super(lexer, stringState);
1297 
1298             this.stringType = stringType;
1299         }
1300 
1301         /**
1302          * Lexify the contents of the string.
1303          */
1304         @Override
1305         public void lexify() {
1306             // Record start of string position.
1307             int stringStart = position;
1308             // Indicate that the priming first string has not been emitted.
1309             boolean primed = false;
1310 
1311             while (true) {
1312                 // Detect end of content.
1313                 if (atEOF()) {
1314                     break;
1315                 }
1316 
1317                 // Honour escapes (should be well formed.)
1318                 if (ch0 == '\\' && stringType == ESCSTRING) {
1319                     skip(2);
1320 
1321                     continue;
1322                 }
1323 
1324                 // If start of expression.
1325                 if (ch0 == '$' && ch1 == '{') {
1326                     if (!primed || stringStart != position) {
1327                         if (primed) {
1328                             add(ADD, stringStart, stringStart + 1);
1329                         }
1330 
1331                         add(stringType, stringStart, position);
1332                         primed = true;
1333                     }
1334 
1335                     // Skip ${
1336                     skip(2);
1337 
1338                     // Save expression state.
1339                     final State expressionState = saveState();
1340 
1341                     // Start with one open brace.
1342                     int braceCount = 1;
1343 
1344                     // Scan for the rest of the string.
1345                     while (!atEOF()) {
1346                         // If closing brace.
1347                         if (ch0 == '}') {
1348                             // Break only only if matching brace.
1349                             if (--braceCount == 0) {
1350                                 break;
1351                             }
1352                         } else if (ch0 == '{') {
1353                             // Bump up the brace count.
1354                             braceCount++;
1355                         }
1356 
1357                         // Skip to next character.
1358                         skip(1);
1359                     }
1360 
1361                     // If braces don't match then report an error.
1362                     if (braceCount != 0) {
1363                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1364                     }
1365 
1366                     // Mark end of expression.
1367                     expressionState.setLimit(position);
1368                     // Skip closing brace.
1369                     skip(1);
1370 
1371                     // Start next string.
1372                     stringStart = position;
1373 
1374                     // Concatenate expression.
1375                     add(ADD, expressionState.position, expressionState.position + 1);
1376                     add(LPAREN, expressionState.position, expressionState.position + 1);
1377 
1378                     // Scan expression.
1379                     final Lexer lexer = new Lexer(this, expressionState);
1380                     lexer.lexify();
1381 
1382                     // Close out expression parenthesis.
1383                     add(RPAREN, position - 1, position);
1384 
1385                     continue;
1386                 }
1387 
1388                 // Next character in string.
1389                 skip(1);
1390             }
1391 
1392             // If there is any unemitted string portion.
1393             if (stringStart != limit) {
1394                 // Concatenate remaining string.
1395                 if (primed) {
1396                     add(ADD, stringStart, 1);
1397                 }
1398 
1399                 add(stringType, stringStart, limit);
1400             }
1401         }
1402 
1403     }
1404 
1405     /**
1406      * Edit string for nested expressions.
1407      *
1408      * @param stringType  Type of string literals to emit.
1409      * @param stringState State of lexer at start of string.
1410      */
1411     private void editString(final TokenType stringType, final State stringState) {
1412         // Use special lexer to scan string.
1413         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1414         lexer.lexify();
1415 
1416         // Need to keep lexer informed.
1417         last = stringType;
1418     }
1419 
1420     /**
1421      * Scan over a here string.
1422      *
1423      * @return TRUE if is a here string.
1424      */
1425     private boolean scanHereString(final LineInfoReceiver lir) {
1426         assert ch0 == '<' && ch1 == '<';
1427         if (scripting) {
1428             // Record beginning of here string.
1429             final State saved = saveState();
1430 
1431             // << or <<<
1432             final boolean excludeLastEOL = ch2 != '<';
1433 
1434             if (excludeLastEOL) {
1435                 skip(2);
1436             } else {
1437                 skip(3);
1438             }
1439 
1440             // Scan identifier.
1441             final int identStart = position;
1442             final int identLength = scanIdentifier();
1443 
1444             // Check for identifier.
1445             if (identLength == 0) {
1446                 // Treat as shift.
1447                 restoreState(saved);
1448 
1449                 return false;
1450             }
1451 
1452             // Record rest of line.
1453             final State restState = saveState();
1454             // keep line number updated
1455             int lastLine = line;
1456 
1457             skipLine(false);
1458             lastLine++;
1459             int lastLinePosition = position;
1460             restState.setLimit(position);
1461 
1462             // Record beginning of string.
1463             final State stringState = saveState();
1464             int stringEnd = position;
1465 
1466             // Hunt down marker.
1467             while (!atEOF()) {
1468                 // Skip any whitespace.
1469                 skipWhitespace(false);
1470 
1471                 if (hasHereMarker(identStart, identLength)) {
1472                     break;
1473                 }
1474 
1475                 skipLine(false);
1476                 lastLine++;
1477                 lastLinePosition = position;
1478                 stringEnd = position;
1479             }
1480 
1481             // notify last line information
1482             lir.lineInfo(lastLine, lastLinePosition);
1483 
1484             // Record end of string.
1485             stringState.setLimit(stringEnd);
1486 
1487             // If marker is missing.
1488             if (stringState.isEmpty() || atEOF()) {
1489                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1490                 restoreState(saved);
1491 
1492                 return false;
1493             }
1494 
1495             // Remove last end of line if specified.
1496             if (excludeLastEOL) {
1497                 // Handles \n.
1498                 if (content[stringEnd - 1] == '\n') {
1499                     stringEnd--;
1500                 }
1501 
1502                 // Handles \r and \r\n.
1503                 if (content[stringEnd - 1] == '\r') {
1504                     stringEnd--;
1505                 }
1506 
1507                 // Update end of string.
1508                 stringState.setLimit(stringEnd);
1509             }
1510 
1511             // Edit string if appropriate.
1512             if (scripting && !stringState.isEmpty()) {
1513                 editString(STRING, stringState);
1514             } else {
1515                 // Add here string.
1516                 add(STRING, stringState.position, stringState.limit);
1517             }
1518 
1519             // Scan rest of original line.
1520             final Lexer restLexer = new Lexer(this, restState);
1521 
1522             restLexer.lexify();
1523 
1524             return true;
1525         }
1526 
1527         return false;
1528     }
1529 
1530     /**
1531      * Breaks source content down into lex units, adding tokens to the token
1532      * stream. The routine scans until the stream buffer is full. Can be called
1533      * repeatedly until EOF is detected.
1534      */
1535     public void lexify() {
1536         while (!stream.isFull() || nested) {
1537             // Skip over whitespace.
1538             skipWhitespace(true);
1539 
1540             // Detect end of file.
1541             if (atEOF()) {
1542                 if (!nested) {
1543                     // Add an EOF token at the end.
1544                     add(EOF, position);
1545                 }
1546 
1547                 break;
1548             }
1549 
1550             // Check for comments. Note that we don't scan for regexp and other literals here as
1551             // we may not have enough context to distinguish them from similar looking operators.
1552             // Instead we break on ambiguous operators below and let the parser decide.
1553             if (ch0 == '/' && skipComments()) {
1554                 continue;
1555             }
1556 
1557             if (scripting && ch0 == '#' && skipComments()) {
1558                 continue;
1559             }
1560 
1561             // TokenType for lookup of delimiter or operator.
1562             TokenType type;
1563 
1564             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1565                 // '.' followed by digit.
1566                 // Scan and add a number.
1567                 scanNumber();
1568             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1569                 // Get the number of characters in the token.
1570                 final int typeLength = type.getLength();
1571                 // Skip that many characters.
1572                 skip(typeLength);
1573                 // Add operator token.
1574                 add(type, position - typeLength);
1575                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1576                 // We break to let the parser decide what it is.
1577                 if (canStartLiteral(type)) {
1578                     break;
1579                 }
1580             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1581                 // Scan and add identifier or keyword.
1582                 scanIdentifierOrKeyword();
1583             } else if (isStringDelimiter(ch0)) {
1584                 // Scan and add a string.
1585                 scanString(true);
1586             } else if (Character.isDigit(ch0)) {
1587                 // Scan and add a number.
1588                 scanNumber();
1589             } else {
1590                 // Don't recognize this character.
1591                 skip(1);
1592                 add(ERROR, position - 1);
1593             }
1594         }
1595     }
1596 
1597     /**
1598      * Return value of token given its token descriptor.
1599      *
1600      * @param token  Token descriptor.
1601      * @return JavaScript value.
1602      */
1603     Object getValueOf(final long token, final boolean strict) {
1604         final int start = Token.descPosition(token);
1605         final int len = Token.descLength(token);
1606 
1607         switch (Token.descType(token)) {
1608         case DECIMAL:
1609             return Lexer.valueOf(source.getString(start, len), 10); // number
1610         case OCTAL:
1611             return Lexer.valueOf(source.getString(start, len), 8); // number
1612         case HEXADECIMAL:
1613             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1614         case FLOATING:
1615             return Double.valueOf(source.getString(start, len)); // number
1616         case STRING:
1617             return source.getString(start, len); // String
1618         case ESCSTRING:
1619             return valueOfString(start, len, strict); // String
1620         case IDENT:
1621             return valueOfIdent(start, len); // String
1622         case REGEX:
1623             return valueOfPattern(start, len); // RegexToken::LexerToken
1624         case XML:
1625             return valueOfXML(start, len); // XMLToken::LexerToken
1626         default:
1627             break;
1628         }
1629 
1630         return null;
1631     }
1632 
1633     /**
1634      * Get the correctly localized error message for a given message id format arguments
1635      * @param msgId message id
1636      * @param args  format arguments
1637      * @return message
1638      */
1639     protected static String message(final String msgId, final String... args) {
1640         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1641     }
1642 
1643     /**
1644      * Generate a runtime exception
1645      *
1646      * @param message       error message
1647      * @param type          token type
1648      * @param start         start position of lexed error
1649      * @param length        length of lexed error
1650      * @throws ParserException  unconditionally
1651      */
1652     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1653         final long token     = Token.toDesc(type, start, length);
1654         final int  pos       = Token.descPosition(token);
1655         final int  lineNum   = source.getLine(pos);
1656         final int  columnNum = source.getColumn(pos);
1657         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1658         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1659     }
1660 
1661     /**
1662      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1663      * This is the abstract superclass
1664      */
1665     public static abstract class LexerToken {
1666         private final String expression;
1667 
1668         /**
1669          * Constructor
1670          * @param expression token expression
1671          */
1672         protected LexerToken(final String expression) {
1673             this.expression = expression;
1674         }
1675 
1676         /**
1677          * Get the expression
1678          * @return expression
1679          */
1680         public String getExpression() {
1681             return expression;
1682         }
1683     }
1684 
1685     /**
1686      * Temporary container for regular expressions.
1687      */
1688     public static class RegexToken extends LexerToken {
1689         /** Options. */
1690         private final String options;
1691 
1692         /**
1693          * Constructor.
1694          *
1695          * @param expression  regexp expression
1696          * @param options     regexp options
1697          */
1698         public RegexToken(final String expression, final String options) {
1699             super(expression);
1700             this.options = options;
1701         }
1702 
1703         /**
1704          * Get regexp options
1705          * @return options
1706          */
1707         public String getOptions() {
1708             return options;
1709         }
1710 
1711         @Override
1712         public String toString() {
1713             return '/' + getExpression() + '/' + options;
1714         }
1715     }
1716 
1717     /**
1718      * Temporary container for XML expression.
1719      */
1720     public static class XMLToken extends LexerToken {
1721 
1722         /**
1723          * Constructor.
1724          *
1725          * @param expression  XML expression
1726          */
1727         public XMLToken(final String expression) {
1728             super(expression);
1729         }
1730     }
1731 }