New src/jdk/nashorn/internal/parser/Lexer.java

   1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  32 import static jdk.nashorn.internal.parser.TokenType.EOF;
  33 import static jdk.nashorn.internal.parser.TokenType.EOL;
  34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  38 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  39 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  40 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  41 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  42 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  43 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  44 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  45 import static jdk.nashorn.internal.parser.TokenType.STRING;
  46 import static jdk.nashorn.internal.parser.TokenType.XML;
  47 
  48 import jdk.nashorn.internal.runtime.ECMAErrors;
  49 import jdk.nashorn.internal.runtime.ErrorManager;
  50 import jdk.nashorn.internal.runtime.JSErrorType;
  51 import jdk.nashorn.internal.runtime.ParserException;
  52 import jdk.nashorn.internal.runtime.Source;
  53 import jdk.nashorn.internal.runtime.options.Options;
  54 
  55 /**
  56  * Responsible for converting source content into a stream of tokens.
  57  *
  58  */
  59 @SuppressWarnings("fallthrough")
  60 public class Lexer extends Scanner {
  61     private static final long MIN_INT_L = Integer.MIN_VALUE;
  62     private static final long MAX_INT_L = Integer.MAX_VALUE;
  63 
  64     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  65 
  66     /** Content source. */
  67     private final Source source;
  68 
  69     /** Buffered stream for tokens. */
  70     private final TokenStream stream;
  71 
  72     /** True if here and edit strings are supported. */
  73     private final boolean scripting;
  74 
  75     /** True if a nested scan. (scan to completion, no EOF.) */
  76     private final boolean nested;
  77 
  78     /** Pending new line number and position. */
  79     private int pendingLine;
  80 
  81     /** Position of last EOL + 1. */
  82     private int linePosition;
  83 
  84     /** Type of last token added. */
  85     private TokenType last;
  86 
  87     private static final String SPACETAB = " \t";  // ASCII space and tab
  88     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
  89 
  90     private static final String JSON_WHITESPACE_EOL = LFCR;
  91     private static final String JSON_WHITESPACE     = SPACETAB + LFCR;
  92 
  93     private static final String JAVASCRIPT_WHITESPACE_EOL =
  94         LFCR +
  95         "\u2028" + // line separator
  96         "\u2029"   // paragraph separator
  97         ;
  98     private static final String JAVASCRIPT_WHITESPACE =
  99         SPACETAB +
 100         JAVASCRIPT_WHITESPACE_EOL +
 101         "\u000b" + // tabulation line
 102         "\u000c" + // ff (ctrl-l)
 103         "\u00a0" + // Latin-1 space
 104         "\u1680" + // Ogham space mark
 105         "\u180e" + // separator, Mongolian vowel
 106         "\u2000" + // en quad
 107         "\u2001" + // em quad
 108         "\u2002" + // en space
 109         "\u2003" + // em space
 110         "\u2004" + // three-per-em space
 111         "\u2005" + // four-per-em space
 112         "\u2006" + // six-per-em space
 113         "\u2007" + // figure space
 114         "\u2008" + // punctuation space
 115         "\u2009" + // thin space
 116         "\u200a" + // hair space
 117         "\u202f" + // narrow no-break space
 118         "\u205f" + // medium mathematical space
 119         "\u3000" + // ideographic space
 120         "\ufeff"   // byte order mark
 121         ;
 122 
 123     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 124         "\\u000a" + // line feed
 125         "\\u000d" + // carriage return (ctrl-m)
 126         "\\u2028" + // line separator
 127         "\\u2029" + // paragraph separator
 128         "\\u0009" + // tab
 129         "\\u0020" + // ASCII space
 130         "\\u000b" + // tabulation line
 131         "\\u000c" + // ff (ctrl-l)
 132         "\\u00a0" + // Latin-1 space
 133         "\\u1680" + // Ogham space mark
 134         "\\u180e" + // separator, Mongolian vowel
 135         "\\u2000" + // en quad
 136         "\\u2001" + // em quad
 137         "\\u2002" + // en space
 138         "\\u2003" + // em space
 139         "\\u2004" + // three-per-em space
 140         "\\u2005" + // four-per-em space
 141         "\\u2006" + // six-per-em space
 142         "\\u2007" + // figure space
 143         "\\u2008" + // punctuation space
 144         "\\u2009" + // thin space
 145         "\\u200a" + // hair space
 146         "\\u202f" + // narrow no-break space
 147         "\\u205f" + // medium mathematical space
 148         "\\u3000" + // ideographic space
 149         "\\ufeff"   // byte order mark
 150         ;
 151 
 152     static String unicodeEscape(final char ch) {
 153         final StringBuilder sb = new StringBuilder();
 154 
 155         sb.append("\\u");
 156 
 157         final String hex = Integer.toHexString(ch);
 158         for (int i = hex.length(); i < 4; i++) {
 159             sb.append('0');
 160         }
 161         sb.append(hex);
 162 
 163         return sb.toString();
 164     }
 165 
 166     /**
 167      * Constructor
 168      *
 169      * @param source    the source
 170      * @param stream    the token stream to lex
 171      */
 172     public Lexer(final Source source, final TokenStream stream) {
 173         this(source, stream, false);
 174     }
 175 
 176     /**
 177      * Constructor
 178      *
 179      * @param source    the source
 180      * @param stream    the token stream to lex
 181      * @param scripting are we in scripting mode
 182      */
 183     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 184         super(source.getContent(), 1, 0, source.getLength());
 185 
 186         this.source      = source;
 187         this.stream      = stream;
 188         this.scripting   = scripting;
 189         this.nested      = false;
 190         this.pendingLine = 1;
 191         this.last        = EOL;
 192     }
 193 
 194     private Lexer(final Lexer lexer, final State state) {
 195         super(lexer, state);
 196 
 197         source = lexer.source;
 198         stream = lexer.stream;
 199         scripting = lexer.scripting;
 200         nested = true;
 201 
 202         pendingLine = state.pendingLine;
 203         linePosition = state.linePosition;
 204         last = EOL;
 205     }
 206 
 207     static class State extends Scanner.State {
 208         /** Pending new line number and position. */
 209         public final int pendingLine;
 210 
 211         /** Position of last EOL + 1. */
 212         public final int linePosition;
 213 
 214         /** Type of last token added. */
 215         public final TokenType last;
 216 
 217         /*
 218          * Constructor.
 219          */
 220 
 221         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 222             super(position, limit, line);
 223 
 224             this.pendingLine = pendingLine;
 225             this.linePosition = linePosition;
 226             this.last = last;
 227         }
 228     }
 229 
 230     /**
 231      * Save the state of the scan.
 232      *
 233      * @return Captured state.
 234      */
 235     @Override
 236     State saveState() {
 237         return new State(position, limit, line, pendingLine, linePosition, last);
 238     }
 239 
 240     /**
 241      * Restore the state of the scan.
 242      *
 243      * @param state
 244      *            Captured state.
 245      */
 246     void restoreState(final State state) {
 247         super.restoreState(state);
 248 
 249         pendingLine = state.pendingLine;
 250         linePosition = state.linePosition;
 251         last = state.last;
 252     }
 253 
 254     /**
 255      * Add a new token to the stream.
 256      *
 257      * @param type
 258      *            Token type.
 259      * @param start
 260      *            Start position.
 261      * @param end
 262      *            End position.
 263      */
 264     protected void add(final TokenType type, final int start, final int end) {
 265         // Record last token.
 266         last = type;
 267 
 268         // Only emit the last EOL in a cluster.
 269         if (type == EOL) {
 270             pendingLine = end;
 271             linePosition = start;
 272         } else {
 273             // Write any pending EOL to stream.
 274             if (pendingLine != -1) {
 275                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 276                 pendingLine = -1;
 277             }
 278 
 279             // Write token to stream.
 280             stream.put(Token.toDesc(type, start, end - start));
 281         }
 282     }
 283 
 284     /**
 285      * Add a new token to the stream.
 286      *
 287      * @param type
 288      *            Token type.
 289      * @param start
 290      *            Start position.
 291      */
 292     protected void add(final TokenType type, final int start) {
 293         add(type, start, position);
 294     }
 295 
 296     /**
 297      * Return the String of valid whitespace characters for regular
 298      * expressions in JavaScript
 299      * @return regexp whitespace string
 300      */
 301     public static String getWhitespaceRegExp() {
 302         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 303     }
 304 
 305     /**
 306      * Skip end of line.
 307      *
 308      * @param addEOL true if EOL token should be recorded.
 309      */
 310     private void skipEOL(final boolean addEOL) {
 311 
 312         if (ch0 == '\r') { // detect \r\n pattern
 313             skip(1);
 314             if (ch0 == '\n') {
 315                 skip(1);
 316             }
 317         } else { // all other space, ch0 is guaranteed to be EOL or \0
 318             skip(1);
 319         }
 320 
 321         // bump up line count
 322         line++;
 323 
 324         if (addEOL) {
 325             // Add an EOL token.
 326             add(EOL, position, line);
 327         }
 328     }
 329 
 330     /**
 331      * Skip over rest of line including end of line.
 332      *
 333      * @param addEOL true if EOL token should be recorded.
 334      */
 335     private void skipLine(final boolean addEOL) {
 336         // Ignore characters.
 337         while (!isEOL(ch0) && !atEOF()) {
 338             skip(1);
 339         }
 340         // Skip over end of line.
 341         skipEOL(addEOL);
 342     }
 343 
 344     /**
 345      * Test whether a char is valid JavaScript whitespace
 346      * @param ch a char
 347      * @return true if valid JavaScript whitespace
 348      */
 349     public static boolean isJSWhitespace(final char ch) {
 350         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 351     }
 352 
 353     /**
 354      * Test whether a char is valid JavaScript end of line
 355      * @param ch a char
 356      * @return true if valid JavaScript end of line
 357      */
 358     public static boolean isJSEOL(final char ch) {
 359         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 360     }
 361 
 362     /**
 363      * Test whether a char is valid JSON whitespace
 364      * @param ch a char
 365      * @return true if valid JSON whitespace
 366      */
 367     public static boolean isJsonWhitespace(final char ch) {
 368         return JSON_WHITESPACE.indexOf(ch) != -1;
 369     }
 370 
 371     /**
 372      * Test whether a char is valid JSON end of line
 373      * @param ch a char
 374      * @return true if valid JSON end of line
 375      */
 376     public static boolean isJsonEOL(final char ch) {
 377         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
 378     }
 379 
 380     /**
 381      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 382      * strings ('`') in scripting mode.
 383      * @param ch a char
 384      * @return true if string delimiter
 385      */
 386     protected boolean isStringDelimiter(final char ch) {
 387         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 388     }
 389 
 390     /**
 391      * Test whether a char is valid JavaScript whitespace
 392      * @param ch a char
 393      * @return true if valid JavaScript whitespace
 394      */
 395     protected boolean isWhitespace(final char ch) {
 396         return Lexer.isJSWhitespace(ch);
 397     }
 398 
 399     /**
 400      * Test whether a char is valid JavaScript end of line
 401      * @param ch a char
 402      * @return true if valid JavaScript end of line
 403      */
 404     protected boolean isEOL(final char ch) {
 405         return Lexer.isJSEOL(ch);
 406     }
 407 
 408     /**
 409      * Skip over whitespace and detect end of line, adding EOL tokens if
 410      * encountered.
 411      *
 412      * @param addEOL true if EOL tokens should be recorded.
 413      */
 414     private void skipWhitespace(final boolean addEOL) {
 415         while (isWhitespace(ch0)) {
 416             if (isEOL(ch0)) {
 417                 skipEOL(addEOL);
 418             } else {
 419                 skip(1);
 420             }
 421         }
 422     }
 423 
 424     /**
 425      * Skip over comments.
 426      *
 427      * @return True if a comment.
 428      */
 429     protected boolean skipComments() {
 430         // Save the current position.
 431         final int start = position;
 432 
 433         if (ch0 == '/') {
 434             // Is it a // comment.
 435             if (ch1 == '/') {
 436                 // Skip over //.
 437                 skip(2);
 438 
 439                 boolean directiveComment = false;
 440                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 441                     directiveComment = true;
 442                 }
 443 
 444                 // Scan for EOL.
 445                 while (!atEOF() && !isEOL(ch0)) {
 446                     skip(1);
 447                 }
 448                 // Did detect a comment.
 449                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 450                 return true;
 451             } else if (ch1 == '*') {
 452                 // Skip over /*.
 453                 skip(2);
 454                 // Scan for */.
 455                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 456                     // If end of line handle else skip character.
 457                     if (isEOL(ch0)) {
 458                         skipEOL(true);
 459                     } else {
 460                         skip(1);
 461                     }
 462                 }
 463 
 464                 if (atEOF()) {
 465                     // TODO - Report closing */ missing in parser.
 466                     add(ERROR, start);
 467                 } else {
 468                     // Skip */.
 469                     skip(2);
 470                 }
 471 
 472                 // Did detect a comment.
 473                 add(COMMENT, start);
 474                 return true;
 475             }
 476         } else if (ch0 == '#') {
 477             assert scripting;
 478             // shell style comment
 479             // Skip over #.
 480             skip(1);
 481             // Scan for EOL.
 482             while (!atEOF() && !isEOL(ch0)) {
 483                 skip(1);
 484             }
 485             // Did detect a comment.
 486             add(COMMENT, start);
 487             return true;
 488         }
 489 
 490         // Not a comment.
 491         return false;
 492     }
 493 
 494     /**
 495      * Convert a regex token to a token object.
 496      *
 497      * @param start  Position in source content.
 498      * @param length Length of regex token.
 499      * @return Regex token object.
 500      */
 501     public RegexToken valueOfPattern(final int start, final int length) {
 502         // Save the current position.
 503         final int savePosition = position;
 504         // Reset to beginning of content.
 505         reset(start);
 506         // Buffer for recording characters.
 507         final StringBuilder sb = new StringBuilder(length);
 508 
 509         // Skip /.
 510         skip(1);
 511         boolean inBrackets = false;
 512         // Scan for closing /, stopping at end of line.
 513         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 514             // Skip over escaped character.
 515             if (ch0 == '\\') {
 516                 sb.append(ch0);
 517                 sb.append(ch1);
 518                 skip(2);
 519             } else {
 520                 if (ch0 == '[') {
 521                     inBrackets = true;
 522                 } else if (ch0 == ']') {
 523                     inBrackets = false;
 524                 }
 525 
 526                 // Skip literal character.
 527                 sb.append(ch0);
 528                 skip(1);
 529             }
 530         }
 531 
 532         // Get pattern as string.
 533         final String regex = sb.toString();
 534 
 535         // Skip /.
 536         skip(1);
 537 
 538         // Options as string.
 539         final String options = source.getString(position, scanIdentifier());
 540 
 541         reset(savePosition);
 542 
 543         // Compile the pattern.
 544         return new RegexToken(regex, options);
 545     }
 546 
 547     /**
 548      * Return true if the given token can be the beginning of a literal.
 549      *
 550      * @param token a token
 551      * @return true if token can start a literal.
 552      */
 553     public boolean canStartLiteral(final TokenType token) {
 554         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 555     }
 556 
 557     /**
 558      * interface to receive line information for multi-line literals.
 559      */
 560     protected interface LineInfoReceiver {
 561         /**
 562          * Receives line information
 563          * @param line last line number
 564          * @param linePosition position of last line
 565          */
 566         public void lineInfo(int line, int linePosition);
 567     }
 568 
 569     /**
 570      * Check whether the given token represents the beginning of a literal. If so scan
 571      * the literal and return <tt>true</tt>, otherwise return false.
 572      *
 573      * @param token the token.
 574      * @param startTokenType the token type.
 575      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 576      * @return True if a literal beginning with startToken was found and scanned.
 577      */
 578     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 579         // Check if it can be a literal.
 580         if (!canStartLiteral(startTokenType)) {
 581             return false;
 582         }
 583         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 584         if (stream.get(stream.last()) != token) {
 585             return false;
 586         }
 587         // Rewind to token start position
 588         reset(Token.descPosition(token));
 589 
 590         if (ch0 == '/') {
 591             return scanRegEx();
 592         } else if (ch0 == '<') {
 593             if (ch1 == '<') {
 594                 return scanHereString(lir);
 595             } else if (Character.isJavaIdentifierStart(ch1)) {
 596                 return scanXMLLiteral();
 597             }
 598         }
 599 
 600         return false;
 601     }
 602 
 603     /**
 604      * Scan over regex literal.
 605      *
 606      * @return True if a regex literal.
 607      */
 608     private boolean scanRegEx() {
 609         assert ch0 == '/';
 610         // Make sure it's not a comment.
 611         if (ch1 != '/' && ch1 != '*') {
 612             // Record beginning of literal.
 613             final int start = position;
 614             // Skip /.
 615             skip(1);
 616             boolean inBrackets = false;
 617 
 618             // Scan for closing /, stopping at end of line.
 619             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 620                 // Skip over escaped character.
 621                 if (ch0 == '\\') {
 622                     skip(1);
 623                     if (isEOL(ch0)) {
 624                         reset(start);
 625                         return false;
 626                     }
 627                     skip(1);
 628                 } else {
 629                     if (ch0 == '[') {
 630                         inBrackets = true;
 631                     } else if (ch0 == ']') {
 632                         inBrackets = false;
 633                     }
 634 
 635                     // Skip literal character.
 636                     skip(1);
 637                 }
 638             }
 639 
 640             // If regex literal.
 641             if (ch0 == '/') {
 642                 // Skip /.
 643                 skip(1);
 644 
 645                 // Skip over options.
 646                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 647                     skip(1);
 648                 }
 649 
 650                 // Add regex token.
 651                 add(REGEX, start);
 652                 // Regex literal detected.
 653                 return true;
 654             }
 655 
 656             // False start try again.
 657             reset(start);
 658         }
 659 
 660         // Regex literal not detected.
 661         return false;
 662     }
 663 
 664     /**
 665      * Convert a digit to a integer.  Can't use Character.digit since we are
 666      * restricted to ASCII by the spec.
 667      *
 668      * @param ch   Character to convert.
 669      * @param base Numeric base.
 670      *
 671      * @return The converted digit or -1 if invalid.
 672      */
 673     protected static int convertDigit(final char ch, final int base) {
 674         int digit;
 675 
 676         if ('0' <= ch && ch <= '9') {
 677             digit = ch - '0';
 678         } else if ('A' <= ch && ch <= 'Z') {
 679             digit = ch - 'A' + 10;
 680         } else if ('a' <= ch && ch <= 'z') {
 681             digit = ch - 'a' + 10;
 682         } else {
 683             return -1;
 684         }
 685 
 686         return digit < base ? digit : -1;
 687     }
 688 
 689 
 690     /**
 691      * Get the value of a hexadecimal numeric sequence.
 692      *
 693      * @param length Number of digits.
 694      * @param type   Type of token to report against.
 695      * @return Value of sequence or < 0 if no digits.
 696      */
 697     private int hexSequence(final int length, final TokenType type) {
 698         int value = 0;
 699 
 700         for (int i = 0; i < length; i++) {
 701             final int digit = convertDigit(ch0, 16);
 702 
 703             if (digit == -1) {
 704                 error(Lexer.message("invalid.hex"), type, position, limit);
 705                 return i == 0 ? -1 : value;
 706             }
 707 
 708             value = digit | value << 4;
 709             skip(1);
 710         }
 711 
 712         return value;
 713     }
 714 
 715     /**
 716      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 717      *
 718      * @return Value of sequence.
 719      */
 720     private int octalSequence() {
 721         int value = 0;
 722 
 723         for (int i = 0; i < 3; i++) {
 724             final int digit = convertDigit(ch0, 8);
 725 
 726             if (digit == -1) {
 727                 break;
 728             }
 729             value = digit | value << 3;
 730             skip(1);
 731 
 732             if (i == 1 && value >= 32) {
 733                 break;
 734             }
 735         }
 736         return value;
 737     }
 738 
 739     /**
 740      * Convert a string to a JavaScript identifier.
 741      *
 742      * @param start  Position in source content.
 743      * @param length Length of token.
 744      * @return Ident string or null if an error.
 745      */
 746     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 747         // Save the current position.
 748         final int savePosition = position;
 749         // End of scan.
 750         final int end = start + length;
 751         // Reset to beginning of content.
 752         reset(start);
 753         // Buffer for recording characters.
 754         final StringBuilder sb = new StringBuilder(length);
 755 
 756         // Scan until end of line or end of file.
 757         while (!atEOF() && position < end && !isEOL(ch0)) {
 758             // If escape character.
 759             if (ch0 == '\\' && ch1 == 'u') {
 760                 skip(2);
 761                 final int ch = hexSequence(4, TokenType.IDENT);
 762                 if (isWhitespace((char)ch)) {
 763                     return null;
 764                 }
 765                 if (ch < 0) {
 766                     sb.append('\\');
 767                     sb.append('u');
 768                 } else {
 769                     sb.append((char)ch);
 770                 }
 771             } else {
 772                 // Add regular character.
 773                 sb.append(ch0);
 774                 skip(1);
 775             }
 776         }
 777 
 778         // Restore position.
 779         reset(savePosition);
 780 
 781         return sb.toString();
 782     }
 783 
 784     /**
 785      * Scan over and identifier or keyword. Handles identifiers containing
 786      * encoded Unicode chars.
 787      *
 788      * Example:
 789      *
 790      * var \u0042 = 44;
 791      */
 792     private void scanIdentifierOrKeyword() {
 793         // Record beginning of identifier.
 794         final int start = position;
 795         // Scan identifier.
 796         final int length = scanIdentifier();
 797         // Check to see if it is a keyword.
 798         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 799         // Add keyword or identifier token.
 800         add(type, start);
 801     }
 802 
 803     /**
 804      * Convert a string to a JavaScript string object.
 805      *
 806      * @param start  Position in source content.
 807      * @param length Length of token.
 808      * @return JavaScript string object.
 809      */
 810     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 811         // Save the current position.
 812         final int savePosition = position;
 813         // Calculate the end position.
 814         final int end = start + length;
 815         // Reset to beginning of string.
 816         reset(start);
 817 
 818         // Buffer for recording characters.
 819         final StringBuilder sb = new StringBuilder(length);
 820 
 821         // Scan until end of string.
 822         while (position < end) {
 823             // If escape character.
 824             if (ch0 == '\\') {
 825                 skip(1);
 826 
 827                 final char next = ch0;
 828                 final int afterSlash = position;
 829 
 830                 skip(1);
 831 
 832                 // Special characters.
 833                 switch (next) {
 834                 case '0':
 835                 case '1':
 836                 case '2':
 837                 case '3':
 838                 case '4':
 839                 case '5':
 840                 case '6':
 841                 case '7': {
 842                     if (strict) {
 843                         // "\0" itself is allowed in strict mode. Only other 'real'
 844                         // octal escape sequences are not allowed (eg. "\02", "\31").
 845                         // See section 7.8.4 String literals production EscapeSequence
 846                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 847                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 848                         }
 849                     }
 850                     reset(afterSlash);
 851                     // Octal sequence.
 852                     final int ch = octalSequence();
 853 
 854                     if (ch < 0) {
 855                         sb.append('\\');
 856                         sb.append('x');
 857                     } else {
 858                         sb.append((char)ch);
 859                     }
 860                     break;
 861                 }
 862                 case 'n':
 863                     sb.append('\n');
 864                     break;
 865                 case 't':
 866                     sb.append('\t');
 867                     break;
 868                 case 'b':
 869                     sb.append('\b');
 870                     break;
 871                 case 'f':
 872                     sb.append('\f');
 873                     break;
 874                 case 'r':
 875                     sb.append('\r');
 876                     break;
 877                 case '\'':
 878                     sb.append('\'');
 879                     break;
 880                 case '\"':
 881                     sb.append('\"');
 882                     break;
 883                 case '\\':
 884                     sb.append('\\');
 885                     break;
 886                 case '\r': // CR | CRLF
 887                     if (ch0 == '\n') {
 888                         skip(1);
 889                     }
 890                     // fall through
 891                 case '\n': // LF
 892                 case '\u2028': // LS
 893                 case '\u2029': // PS
 894                     // continue on the next line, slash-return continues string
 895                     // literal
 896                     break;
 897                 case 'x': {
 898                     // Hex sequence.
 899                     final int ch = hexSequence(2, STRING);
 900 
 901                     if (ch < 0) {
 902                         sb.append('\\');
 903                         sb.append('x');
 904                     } else {
 905                         sb.append((char)ch);
 906                     }
 907                 }
 908                     break;
 909                 case 'u': {
 910                     // Unicode sequence.
 911                     final int ch = hexSequence(4, STRING);
 912 
 913                     if (ch < 0) {
 914                         sb.append('\\');
 915                         sb.append('u');
 916                     } else {
 917                         sb.append((char)ch);
 918                     }
 919                 }
 920                     break;
 921                 case 'v':
 922                     sb.append('\u000B');
 923                     break;
 924                 // All other characters.
 925                 default:
 926                     sb.append(next);
 927                     break;
 928                 }
 929             } else {
 930                 // Add regular character.
 931                 sb.append(ch0);
 932                 skip(1);
 933             }
 934         }
 935 
 936         // Restore position.
 937         reset(savePosition);
 938 
 939         return sb.toString();
 940     }
 941 
 942     /**
 943      * Scan over a string literal.
 944      * @param add true if we nare not just scanning but should actually modify the token stream
 945      */
 946     protected void scanString(final boolean add) {
 947         // Type of string.
 948         TokenType type = STRING;
 949         // Record starting quote.
 950         final char quote = ch0;
 951         // Skip over quote.
 952         skip(1);
 953 
 954         // Record beginning of string content.
 955         final State stringState = saveState();
 956 
 957         // Scan until close quote or end of line.
 958         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 959             // Skip over escaped character.
 960             if (ch0 == '\\') {
 961                 type = ESCSTRING;
 962                 skip(1);
 963                 if (! isEscapeCharacter(ch0)) {
 964                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 965                 }
 966                 if (isEOL(ch0)) {
 967                     // Multiline string literal
 968                     skipEOL(false);
 969                     continue;
 970                 }
 971             }
 972             // Skip literal character.
 973             skip(1);
 974         }
 975 
 976         // If close quote.
 977         if (ch0 == quote) {
 978             // Skip close quote.
 979             skip(1);
 980         } else {
 981             error(Lexer.message("missing.close.quote"), STRING, position, limit);
 982         }
 983 
 984         // If not just scanning.
 985         if (add) {
 986             // Record end of string.
 987             stringState.setLimit(position - 1);
 988 
 989             if (scripting && !stringState.isEmpty()) {
 990                 switch (quote) {
 991                 case '`':
 992                     // Mark the beginning of an exec string.
 993                     add(EXECSTRING, stringState.position, stringState.limit);
 994                     // Frame edit string with left brace.
 995                     add(LBRACE, stringState.position, stringState.position);
 996                     // Process edit string.
 997                     editString(type, stringState);
 998                     // Frame edit string with right brace.
 999                     add(RBRACE, stringState.limit, stringState.limit);
1000                     break;
1001                 case '"':
1002                     // Only edit double quoted strings.
1003                     editString(type, stringState);
1004                     break;
1005                 case '\'':
1006                     // Add string token without editing.
1007                     add(type, stringState.position, stringState.limit);
1008                     break;
1009                 default:
1010                     break;
1011                 }
1012             } else {
1013                 /// Add string token without editing.
1014                 add(type, stringState.position, stringState.limit);
1015             }
1016         }
1017     }
1018 
1019     /**
1020      * Is the given character a valid escape char after "\" ?
1021      *
1022      * @param ch character to be checked
1023      * @return if the given character is valid after "\"
1024      */
1025     protected boolean isEscapeCharacter(final char ch) {
1026         return true;
1027     }
1028 
1029     /**
1030      * Convert string to number.
1031      *
1032      * @param valueString  String to convert.
1033      * @param radix        Numeric base.
1034      * @return Converted number.
1035      */
1036     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1037         try {
1038             final long value = Long.parseLong(valueString, radix);
1039             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1040                 return Integer.valueOf((int)value);
1041             }
1042             return Long.valueOf(value);
1043         } catch (final NumberFormatException e) {
1044             if (radix == 10) {
1045                 return Double.valueOf(valueString);
1046             }
1047 
1048             double value = 0.0;
1049 
1050             for (int i = 0; i < valueString.length(); i++) {
1051                 final char ch = valueString.charAt(i);
1052                 // Preverified, should always be a valid digit.
1053                 final int digit = convertDigit(ch, radix);
1054                 value *= radix;
1055                 value += digit;
1056             }
1057 
1058             return value;
1059         }
1060     }
1061 
1062     /**
1063      * Scan a number.
1064      */
1065     protected void scanNumber() {
1066         // Record beginning of number.
1067         final int start = position;
1068         // Assume value is a decimal.
1069         TokenType type = DECIMAL;
1070 
1071         // First digit of number.
1072         int digit = convertDigit(ch0, 10);
1073 
1074         // If number begins with 0x.
1075         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1076             // Skip over 0xN.
1077             skip(3);
1078             // Skip over remaining digits.
1079             while (convertDigit(ch0, 16) != -1) {
1080                 skip(1);
1081             }
1082 
1083             type = HEXADECIMAL;
1084         } else {
1085             // Check for possible octal constant.
1086             boolean octal = digit == 0;
1087             // Skip first digit if not leading '.'.
1088             if (digit != -1) {
1089                 skip(1);
1090             }
1091 
1092             // Skip remaining digits.
1093             while ((digit = convertDigit(ch0, 10)) != -1) {
1094                 // Check octal only digits.
1095                 octal = octal && digit < 8;
1096                 // Skip digit.
1097                 skip(1);
1098             }
1099 
1100             if (octal && position - start > 1) {
1101                 type = OCTAL;
1102             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1103                 // Must be a double.
1104                 if (ch0 == '.') {
1105                     // Skip period.
1106                     skip(1);
1107                     // Skip mantissa.
1108                     while (convertDigit(ch0, 10) != -1) {
1109                         skip(1);
1110                     }
1111                 }
1112 
1113                 // Detect exponent.
1114                 if (ch0 == 'E' || ch0 == 'e') {
1115                     // Skip E.
1116                     skip(1);
1117                     // Detect and skip exponent sign.
1118                     if (ch0 == '+' || ch0 == '-') {
1119                         skip(1);
1120                     }
1121                     // Skip exponent.
1122                     while (convertDigit(ch0, 10) != -1) {
1123                         skip(1);
1124                     }
1125                 }
1126 
1127                 type = FLOATING;
1128             }
1129         }
1130 
1131         if (Character.isJavaIdentifierStart(ch0)) {
1132             error(Lexer.message("missing.space.after.number"), type, position, 1);
1133         }
1134 
1135         // Add number token.
1136         add(type, start);
1137     }
1138 
1139     /**
1140      * Convert a regex token to a token object.
1141      *
1142      * @param start  Position in source content.
1143      * @param length Length of regex token.
1144      * @return Regex token object.
1145      */
1146     XMLToken valueOfXML(final int start, final int length) {
1147         return new XMLToken(source.getString(start, length));
1148     }
1149 
1150     /**
1151      * Scan over a XML token.
1152      *
1153      * @return TRUE if is an XML literal.
1154      */
1155     private boolean scanXMLLiteral() {
1156         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1157         if (XML_LITERALS) {
1158             // Record beginning of xml expression.
1159             final int start = position;
1160 
1161             int openCount = 0;
1162 
1163             do {
1164                 if (ch0 == '<') {
1165                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1166                         skip(3);
1167                         openCount--;
1168                     } else if (Character.isJavaIdentifierStart(ch1)) {
1169                         skip(2);
1170                         openCount++;
1171                     } else if (ch1 == '?') {
1172                         skip(2);
1173                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1174                         skip(4);
1175                     } else {
1176                         reset(start);
1177                         return false;
1178                     }
1179 
1180                     while (!atEOF() && ch0 != '>') {
1181                         if (ch0 == '/' && ch1 == '>') {
1182                             openCount--;
1183                             skip(1);
1184                             break;
1185                         } else if (ch0 == '\"' || ch0 == '\'') {
1186                             scanString(false);
1187                         } else {
1188                             skip(1);
1189                         }
1190                     }
1191 
1192                     if (ch0 != '>') {
1193                         reset(start);
1194                         return false;
1195                     }
1196 
1197                     skip(1);
1198                 } else if (atEOF()) {
1199                     reset(start);
1200                     return false;
1201                 } else {
1202                     skip(1);
1203                 }
1204             } while (openCount > 0);
1205 
1206             add(XML, start);
1207             return true;
1208         }
1209 
1210         return false;
1211     }
1212 
1213     /**
1214      * Scan over identifier characters.
1215      *
1216      * @return Length of identifier or zero if none found.
1217      */
1218     private int scanIdentifier() {
1219         final int start = position;
1220 
1221         // Make sure first character is valid start character.
1222         if (ch0 == '\\' && ch1 == 'u') {
1223             skip(2);
1224             final int ch = hexSequence(4, TokenType.IDENT);
1225 
1226             if (!Character.isJavaIdentifierStart(ch)) {
1227                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1228             }
1229         } else if (!Character.isJavaIdentifierStart(ch0)) {
1230             // Not an identifier.
1231             return 0;
1232         }
1233 
1234         // Make sure remaining characters are valid part characters.
1235         while (!atEOF()) {
1236             if (ch0 == '\\' && ch1 == 'u') {
1237                 skip(2);
1238                 final int ch = hexSequence(4, TokenType.IDENT);
1239 
1240                 if (!Character.isJavaIdentifierPart(ch)) {
1241                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1242                 }
1243             } else if (Character.isJavaIdentifierPart(ch0)) {
1244                 skip(1);
1245             } else {
1246                 break;
1247             }
1248         }
1249 
1250         // Length of identifier sequence.
1251         return position - start;
1252     }
1253 
1254     /**
1255      * Compare two identifiers (in content) for equality.
1256      *
1257      * @param aStart  Start of first identifier.
1258      * @param aLength Length of first identifier.
1259      * @param bStart  Start of second identifier.
1260      * @param bLength Length of second identifier.
1261      * @return True if equal.
1262      */
1263     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1264         if (aLength == bLength) {
1265             for (int i = 0; i < aLength; i++) {
1266                 if (content[aStart + i] != content[bStart + i]) {
1267                     return false;
1268                 }
1269             }
1270 
1271             return true;
1272         }
1273 
1274         return false;
1275     }
1276 
1277     /**
1278      * Detect if a line starts with a marker identifier.
1279      *
1280      * @param identStart  Start of identifier.
1281      * @param identLength Length of identifier.
1282      * @return True if detected.
1283      */
1284     private boolean hasHereMarker(final int identStart, final int identLength) {
1285         // Skip any whitespace.
1286         skipWhitespace(false);
1287 
1288         return identifierEqual(identStart, identLength, position, scanIdentifier());
1289     }
1290 
1291     /**
1292      * Lexer to service edit strings.
1293      */
1294     private static class EditStringLexer extends Lexer {
1295         /** Type of string literals to emit. */
1296         final TokenType stringType;
1297 
1298         /*
1299          * Constructor.
1300          */
1301 
1302         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1303             super(lexer, stringState);
1304 
1305             this.stringType = stringType;
1306         }
1307 
1308         /**
1309          * Lexify the contents of the string.
1310          */
1311         @Override
1312         public void lexify() {
1313             // Record start of string position.
1314             int stringStart = position;
1315             // Indicate that the priming first string has not been emitted.
1316             boolean primed = false;
1317 
1318             while (true) {
1319                 // Detect end of content.
1320                 if (atEOF()) {
1321                     break;
1322                 }
1323 
1324                 // Honour escapes (should be well formed.)
1325                 if (ch0 == '\\' && stringType == ESCSTRING) {
1326                     skip(2);
1327 
1328                     continue;
1329                 }
1330 
1331                 // If start of expression.
1332                 if (ch0 == '$' && ch1 == '{') {
1333                     if (!primed || stringStart != position) {
1334                         if (primed) {
1335                             add(ADD, stringStart, stringStart + 1);
1336                         }
1337 
1338                         add(stringType, stringStart, position);
1339                         primed = true;
1340                     }
1341 
1342                     // Skip ${
1343                     skip(2);
1344 
1345                     // Save expression state.
1346                     final State expressionState = saveState();
1347 
1348                     // Start with one open brace.
1349                     int braceCount = 1;
1350 
1351                     // Scan for the rest of the string.
1352                     while (!atEOF()) {
1353                         // If closing brace.
1354                         if (ch0 == '}') {
1355                             // Break only only if matching brace.
1356                             if (--braceCount == 0) {
1357                                 break;
1358                             }
1359                         } else if (ch0 == '{') {
1360                             // Bump up the brace count.
1361                             braceCount++;
1362                         }
1363 
1364                         // Skip to next character.
1365                         skip(1);
1366                     }
1367 
1368                     // If braces don't match then report an error.
1369                     if (braceCount != 0) {
1370                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1371                     }
1372 
1373                     // Mark end of expression.
1374                     expressionState.setLimit(position);
1375                     // Skip closing brace.
1376                     skip(1);
1377 
1378                     // Start next string.
1379                     stringStart = position;
1380 
1381                     // Concatenate expression.
1382                     add(ADD, expressionState.position, expressionState.position + 1);
1383                     add(LPAREN, expressionState.position, expressionState.position + 1);
1384 
1385                     // Scan expression.
1386                     final Lexer lexer = new Lexer(this, expressionState);
1387                     lexer.lexify();
1388 
1389                     // Close out expression parenthesis.
1390                     add(RPAREN, position - 1, position);
1391 
1392                     continue;
1393                 }
1394 
1395                 // Next character in string.
1396                 skip(1);
1397             }
1398 
1399             // If there is any unemitted string portion.
1400             if (stringStart != limit) {
1401                 // Concatenate remaining string.
1402                 if (primed) {
1403                     add(ADD, stringStart, 1);
1404                 }
1405 
1406                 add(stringType, stringStart, limit);
1407             }
1408         }
1409 
1410     }
1411 
1412     /**
1413      * Edit string for nested expressions.
1414      *
1415      * @param stringType  Type of string literals to emit.
1416      * @param stringState State of lexer at start of string.
1417      */
1418     private void editString(final TokenType stringType, final State stringState) {
1419         // Use special lexer to scan string.
1420         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1421         lexer.lexify();
1422 
1423         // Need to keep lexer informed.
1424         last = stringType;
1425     }
1426 
1427     /**
1428      * Scan over a here string.
1429      *
1430      * @return TRUE if is a here string.
1431      */
1432     private boolean scanHereString(final LineInfoReceiver lir) {
1433         assert ch0 == '<' && ch1 == '<';
1434         if (scripting) {
1435             // Record beginning of here string.
1436             final State saved = saveState();
1437 
1438             // << or <<<
1439             final boolean excludeLastEOL = ch2 != '<';
1440 
1441             if (excludeLastEOL) {
1442                 skip(2);
1443             } else {
1444                 skip(3);
1445             }
1446 
1447             // Scan identifier.
1448             final int identStart = position;
1449             final int identLength = scanIdentifier();
1450 
1451             // Check for identifier.
1452             if (identLength == 0) {
1453                 // Treat as shift.
1454                 restoreState(saved);
1455 
1456                 return false;
1457             }
1458 
1459             // Record rest of line.
1460             final State restState = saveState();
1461             // keep line number updated
1462             int lastLine = line;
1463 
1464             skipLine(false);
1465             lastLine++;
1466             int lastLinePosition = position;
1467             restState.setLimit(position);
1468 
1469             // Record beginning of string.
1470             final State stringState = saveState();
1471             int stringEnd = position;
1472 
1473             // Hunt down marker.
1474             while (!atEOF()) {
1475                 // Skip any whitespace.
1476                 skipWhitespace(false);
1477 
1478                 if (hasHereMarker(identStart, identLength)) {
1479                     break;
1480                 }
1481 
1482                 skipLine(false);
1483                 lastLine++;
1484                 lastLinePosition = position;
1485                 stringEnd = position;
1486             }
1487 
1488             // notify last line information
1489             lir.lineInfo(lastLine, lastLinePosition);
1490 
1491             // Record end of string.
1492             stringState.setLimit(stringEnd);
1493 
1494             // If marker is missing.
1495             if (stringState.isEmpty() || atEOF()) {
1496                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1497                 restoreState(saved);
1498 
1499                 return false;
1500             }
1501 
1502             // Remove last end of line if specified.
1503             if (excludeLastEOL) {
1504                 // Handles \n.
1505                 if (content[stringEnd - 1] == '\n') {
1506                     stringEnd--;
1507                 }
1508 
1509                 // Handles \r and \r\n.
1510                 if (content[stringEnd - 1] == '\r') {
1511                     stringEnd--;
1512                 }
1513 
1514                 // Update end of string.
1515                 stringState.setLimit(stringEnd);
1516             }
1517 
1518             // Edit string if appropriate.
1519             if (scripting && !stringState.isEmpty()) {
1520                 editString(STRING, stringState);
1521             } else {
1522                 // Add here string.
1523                 add(STRING, stringState.position, stringState.limit);
1524             }
1525 
1526             // Scan rest of original line.
1527             final Lexer restLexer = new Lexer(this, restState);
1528 
1529             restLexer.lexify();
1530 
1531             return true;
1532         }
1533 
1534         return false;
1535     }
1536 
1537     /**
1538      * Breaks source content down into lex units, adding tokens to the token
1539      * stream. The routine scans until the stream buffer is full. Can be called
1540      * repeatedly until EOF is detected.
1541      */
1542     public void lexify() {
1543         while (!stream.isFull() || nested) {
1544             // Skip over whitespace.
1545             skipWhitespace(true);
1546 
1547             // Detect end of file.
1548             if (atEOF()) {
1549                 if (!nested) {
1550                     // Add an EOF token at the end.
1551                     add(EOF, position);
1552                 }
1553 
1554                 break;
1555             }
1556 
1557             // Check for comments. Note that we don't scan for regexp and other literals here as
1558             // we may not have enough context to distinguish them from similar looking operators.
1559             // Instead we break on ambiguous operators below and let the parser decide.
1560             if (ch0 == '/' && skipComments()) {
1561                 continue;
1562             }
1563 
1564             if (scripting && ch0 == '#' && skipComments()) {
1565                 continue;
1566             }
1567 
1568             // TokenType for lookup of delimiter or operator.
1569             TokenType type;
1570 
1571             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1572                 // '.' followed by digit.
1573                 // Scan and add a number.
1574                 scanNumber();
1575             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1576                 // Get the number of characters in the token.
1577                 final int typeLength = type.getLength();
1578                 // Skip that many characters.
1579                 skip(typeLength);
1580                 // Add operator token.
1581                 add(type, position - typeLength);
1582                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1583                 // We break to let the parser decide what it is.
1584                 if (canStartLiteral(type)) {
1585                     break;
1586                 }
1587             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1588                 // Scan and add identifier or keyword.
1589                 scanIdentifierOrKeyword();
1590             } else if (isStringDelimiter(ch0)) {
1591                 // Scan and add a string.
1592                 scanString(true);
1593             } else if (Character.isDigit(ch0)) {
1594                 // Scan and add a number.
1595                 scanNumber();
1596             } else {
1597                 // Don't recognize this character.
1598                 skip(1);
1599                 add(ERROR, position - 1);
1600             }
1601         }
1602     }
1603 
1604     /**
1605      * Return value of token given its token descriptor.
1606      *
1607      * @param token  Token descriptor.
1608      * @return JavaScript value.
1609      */
1610     Object getValueOf(final long token, final boolean strict) {
1611         final int start = Token.descPosition(token);
1612         final int len = Token.descLength(token);
1613 
1614         switch (Token.descType(token)) {
1615         case DECIMAL:
1616             return Lexer.valueOf(source.getString(start, len), 10); // number
1617         case OCTAL:
1618             return Lexer.valueOf(source.getString(start, len), 8); // number
1619         case HEXADECIMAL:
1620             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1621         case FLOATING:
1622             return Double.valueOf(source.getString(start, len)); // number
1623         case STRING:
1624             return source.getString(start, len); // String
1625         case ESCSTRING:
1626             return valueOfString(start, len, strict); // String
1627         case IDENT:
1628             return valueOfIdent(start, len); // String
1629         case REGEX:
1630             return valueOfPattern(start, len); // RegexToken::LexerToken
1631         case XML:
1632             return valueOfXML(start, len); // XMLToken::LexerToken
1633         case DIRECTIVE_COMMENT:
1634             return source.getString(start, len);
1635         default:
1636             break;
1637         }
1638 
1639         return null;
1640     }
1641 
1642     /**
1643      * Get the correctly localized error message for a given message id format arguments
1644      * @param msgId message id
1645      * @param args  format arguments
1646      * @return message
1647      */
1648     protected static String message(final String msgId, final String... args) {
1649         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1650     }
1651 
1652     /**
1653      * Generate a runtime exception
1654      *
1655      * @param message       error message
1656      * @param type          token type
1657      * @param start         start position of lexed error
1658      * @param length        length of lexed error
1659      * @throws ParserException  unconditionally
1660      */
1661     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1662         final long token     = Token.toDesc(type, start, length);
1663         final int  pos       = Token.descPosition(token);
1664         final int  lineNum   = source.getLine(pos);
1665         final int  columnNum = source.getColumn(pos);
1666         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1667         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1668     }
1669 
1670     /**
1671      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1672      * This is the abstract superclass
1673      */
1674     public static abstract class LexerToken {
1675         private final String expression;
1676 
1677         /**
1678          * Constructor
1679          * @param expression token expression
1680          */
1681         protected LexerToken(final String expression) {
1682             this.expression = expression;
1683         }
1684 
1685         /**
1686          * Get the expression
1687          * @return expression
1688          */
1689         public String getExpression() {
1690             return expression;
1691         }
1692     }
1693 
1694     /**
1695      * Temporary container for regular expressions.
1696      */
1697     public static class RegexToken extends LexerToken {
1698         /** Options. */
1699         private final String options;
1700 
1701         /**
1702          * Constructor.
1703          *
1704          * @param expression  regexp expression
1705          * @param options     regexp options
1706          */
1707         public RegexToken(final String expression, final String options) {
1708             super(expression);
1709             this.options = options;
1710         }
1711 
1712         /**
1713          * Get regexp options
1714          * @return options
1715          */
1716         public String getOptions() {
1717             return options;
1718         }
1719 
1720         @Override
1721         public String toString() {
1722             return '/' + getExpression() + '/' + options;
1723         }
1724     }
1725 
1726     /**
1727      * Temporary container for XML expression.
1728      */
1729     public static class XMLToken extends LexerToken {
1730 
1731         /**
1732          * Constructor.
1733          *
1734          * @param expression  XML expression
1735          */
1736         public XMLToken(final String expression) {
1737             super(expression);
1738         }
1739     }
1740 }