New src/jdk/nashorn/internal/parser/Lexer.java

   1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.EOL;
  32 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  33 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  34 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  35 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  36 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  37 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  38 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  39 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  40 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  41 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  42 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  43 import static jdk.nashorn.internal.parser.TokenType.STRING;
  44 import static jdk.nashorn.internal.parser.TokenType.XML;
  45 
  46 import jdk.nashorn.internal.runtime.ECMAErrors;
  47 import jdk.nashorn.internal.runtime.ErrorManager;
  48 import jdk.nashorn.internal.runtime.JSErrorType;
  49 import jdk.nashorn.internal.runtime.JSType;
  50 import jdk.nashorn.internal.runtime.ParserException;
  51 import jdk.nashorn.internal.runtime.Source;
  52 import jdk.nashorn.internal.runtime.options.Options;
  53 
  54 /**
  55  * Responsible for converting source content into a stream of tokens.
  56  *
  57  */
  58 @SuppressWarnings("fallthrough")
  59 public class Lexer extends Scanner {
  60     private static final long MIN_INT_L = Integer.MIN_VALUE;
  61     private static final long MAX_INT_L = Integer.MAX_VALUE;
  62 
  63     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  64 
  65     /** Content source. */
  66     private final Source source;
  67 
  68     /** Buffered stream for tokens. */
  69     private final TokenStream stream;
  70 
  71     /** True if here and edit strings are supported. */
  72     private final boolean scripting;
  73 
  74     /** True if a nested scan. (scan to completion, no EOF.) */
  75     private final boolean nested;
  76 
  77     /** Pending new line number and position. */
  78     private int pendingLine;
  79 
  80     /** Position of last EOL + 1. */
  81     private int linePosition;
  82 
  83     /** Type of last token added. */
  84     private TokenType last;
  85 
  86     private static final String JAVASCRIPT_WHITESPACE;
  87     private static final String JAVASCRIPT_WHITESPACE_EOL;
  88     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP;
  89 
  90     private static final String JSON_WHITESPACE;
  91     private static final String JSON_WHITESPACE_EOL;
  92 
  93     static String unicodeEscape(final char ch) {
  94         final StringBuilder sb = new StringBuilder();
  95 
  96         sb.append("\\u");
  97 
  98         final String hex = Integer.toHexString(ch);
  99         for (int i = hex.length(); i < 4; i++) {
 100             sb.append('0');
 101         }
 102         sb.append(hex);
 103 
 104         return sb.toString();
 105     }
 106 
 107     static {
 108         final StringBuilder ws       = new StringBuilder();
 109         final StringBuilder wsEOL    = new StringBuilder();
 110         final StringBuilder wsRegExp = new StringBuilder();
 111         final StringBuilder jsonWs   = new StringBuilder();
 112 
 113         jsonWs.append((char)0x000a);
 114         jsonWs.append((char)0x000d);
 115         JSON_WHITESPACE_EOL = jsonWs.toString();
 116 
 117         jsonWs.append((char)0x0009);
 118         jsonWs.append((char)0x0020);
 119         JSON_WHITESPACE = jsonWs.toString();
 120 
 121         for (int i = 0; i <= 0xffff; i++) {
 122            switch (i) {
 123             case 0x000a: // line feed
 124             case 0x000d: // carriage return (ctrl-m)
 125             case 0x2028: // line separator
 126             case 0x2029: // paragraph separator
 127                 wsEOL.append((char)i);
 128             case 0x0009: // tab
 129             case 0x0020: // ASCII space
 130             case 0x000b: // tabulation line
 131             case 0x000c: // ff (ctrl-l)
 132             case 0x00a0: // Latin-1 space
 133             case 0x1680: // Ogham space mark
 134             case 0x180e: // separator, Mongolian vowel
 135             case 0x2000: // en quad
 136             case 0x2001: // em quad
 137             case 0x2002: // en space
 138             case 0x2003: // em space
 139             case 0x2004: // three-per-em space
 140             case 0x2005: // four-per-em space
 141             case 0x2006: // six-per-em space
 142             case 0x2007: // figure space
 143             case 0x2008: // punctuation space
 144             case 0x2009: // thin space
 145             case 0x200a: // hair space
 146             case 0x202f: // narrow no-break space
 147             case 0x205f: // medium mathematical space
 148             case 0x3000: // ideographic space
 149             case 0xfeff: // byte order mark
 150                 ws.append((char)i);
 151 
 152                 wsRegExp.append(Lexer.unicodeEscape((char)i));
 153                 break;
 154 
 155             default:
 156                 break;
 157             }
 158         }
 159 
 160         JAVASCRIPT_WHITESPACE = ws.toString();
 161         JAVASCRIPT_WHITESPACE_EOL = wsEOL.toString();
 162         JAVASCRIPT_WHITESPACE_IN_REGEXP = wsRegExp.toString();
 163 
 164     }
 165 
 166     /**
 167      * Constructor
 168      *
 169      * @param source    the source
 170      * @param stream    the token stream to lex
 171      */
 172     public Lexer(final Source source, final TokenStream stream) {
 173         this(source, stream, false);
 174     }
 175 
 176     /**
 177      * Constructor
 178      *
 179      * @param source    the source
 180      * @param stream    the token stream to lex
 181      * @param scripting are we in scripting mode
 182      */
 183     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
 184         super(source.getContent(), 1, 0, source.getLength());
 185 
 186         this.source      = source;
 187         this.stream      = stream;
 188         this.scripting   = scripting;
 189         this.nested      = false;
 190         this.pendingLine = 1;
 191         this.last        = EOL;
 192     }
 193 
 194     private Lexer(final Lexer lexer, final State state) {
 195         super(lexer, state);
 196 
 197         source = lexer.source;
 198         stream = lexer.stream;
 199         scripting = lexer.scripting;
 200         nested = true;
 201 
 202         pendingLine = state.pendingLine;
 203         linePosition = state.linePosition;
 204         last = EOL;
 205     }
 206 
 207     static class State extends Scanner.State {
 208         /** Pending new line number and position. */
 209         public final int pendingLine;
 210 
 211         /** Position of last EOL + 1. */
 212         public final int linePosition;
 213 
 214         /** Type of last token added. */
 215         public final TokenType last;
 216 
 217         /*
 218          * Constructor.
 219          */
 220 
 221         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 222             super(position, limit, line);
 223 
 224             this.pendingLine = pendingLine;
 225             this.linePosition = linePosition;
 226             this.last = last;
 227         }
 228     }
 229 
 230     /**
 231      * Save the state of the scan.
 232      *
 233      * @return Captured state.
 234      */
 235     @Override
 236     State saveState() {
 237         return new State(position, limit, line, pendingLine, linePosition, last);
 238     }
 239 
 240     /**
 241      * Restore the state of the scan.
 242      *
 243      * @param state
 244      *            Captured state.
 245      */
 246     void restoreState(final State state) {
 247         super.restoreState(state);
 248 
 249         pendingLine = state.pendingLine;
 250         linePosition = state.linePosition;
 251         last = state.last;
 252     }
 253 
 254     /**
 255      * Add a new token to the stream.
 256      *
 257      * @param type
 258      *            Token type.
 259      * @param start
 260      *            Start position.
 261      * @param end
 262      *            End position.
 263      */
 264     protected void add(final TokenType type, final int start, final int end) {
 265         // Record last token.
 266         last = type;
 267 
 268         // Only emit the last EOL in a cluster.
 269         if (type == EOL) {
 270             pendingLine = end;
 271             linePosition = start;
 272         } else {
 273             // Write any pending EOL to stream.
 274             if (pendingLine != -1) {
 275                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 276                 pendingLine = -1;
 277             }
 278 
 279             // Write token to stream.
 280             stream.put(Token.toDesc(type, start, end - start));
 281         }
 282     }
 283 
 284     /**
 285      * Add a new token to the stream.
 286      *
 287      * @param type
 288      *            Token type.
 289      * @param start
 290      *            Start position.
 291      */
 292     protected void add(final TokenType type, final int start) {
 293         add(type, start, position);
 294     }
 295 
 296     /**
 297      * Return the String of valid whitespace characters for regular
 298      * expressions in JavaScript
 299      * @return regexp whitespace string
 300      */
 301     public static String getWhitespaceRegExp() {
 302         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 303     }
 304 
 305     /**
 306      * Skip end of line.
 307      *
 308      * @param addEOL true if EOL token should be recorded.
 309      */
 310     private void skipEOL(final boolean addEOL) {
 311 
 312         if (ch0 == '\r') { // detect \r\n pattern
 313             skip(1);
 314             if (ch0 == '\n') {
 315                 skip(1);
 316             }
 317         } else { // all other space, ch0 is guaranteed to be EOL or \0
 318             skip(1);
 319         }
 320 
 321         // bump up line count
 322         line++;
 323 
 324         if (addEOL) {
 325             // Add an EOL token.
 326             add(EOL, position, line);
 327         }
 328     }
 329 
 330     /**
 331      * Skip over rest of line including end of line.
 332      *
 333      * @param addEOL true if EOL token should be recorded.
 334      */
 335     private void skipLine(final boolean addEOL) {
 336         // Ignore characters.
 337         while (!isEOL(ch0) && !atEOF()) {
 338             skip(1);
 339         }
 340         // Skip over end of line.
 341         skipEOL(addEOL);
 342     }
 343 
 344     /**
 345      * Test whether a char is valid JavaScript whitespace
 346      * @param ch a char
 347      * @return true if valid JavaScript whitespace
 348      */
 349     public static boolean isJSWhitespace(final char ch) {
 350         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 351     }
 352 
 353     /**
 354      * Test whether a char is valid JavaScript end of line
 355      * @param ch a char
 356      * @return true if valid JavaScript end of line
 357      */
 358     public static boolean isJSEOL(final char ch) {
 359         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 360     }
 361 
 362     /**
 363      * Test whether a char is valid JSON whitespace
 364      * @param ch a char
 365      * @return true if valid JSON whitespace
 366      */
 367     public static boolean isJsonWhitespace(final char ch) {
 368         return JSON_WHITESPACE.indexOf(ch) != -1;
 369     }
 370 
 371     /**
 372      * Test whether a char is valid JSON end of line
 373      * @param ch a char
 374      * @return true if valid JSON end of line
 375      */
 376     public static boolean isJsonEOL(final char ch) {
 377         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
 378     }
 379 
 380     /**
 381      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
 382      * strings ('`') in scripting mode.
 383      * @param ch a char
 384      * @return true if string delimiter
 385      */
 386     protected boolean isStringDelimiter(final char ch) {
 387         return ch == '\'' || ch == '"' || (scripting && ch == '`');
 388     }
 389 
 390     /**
 391      * Test whether a char is valid JavaScript whitespace
 392      * @param ch a char
 393      * @return true if valid JavaScript whitespace
 394      */
 395     protected boolean isWhitespace(final char ch) {
 396         return Lexer.isJSWhitespace(ch);
 397     }
 398 
 399     /**
 400      * Test whether a char is valid JavaScript end of line
 401      * @param ch a char
 402      * @return true if valid JavaScript end of line
 403      */
 404     protected boolean isEOL(final char ch) {
 405         return Lexer.isJSEOL(ch);
 406     }
 407 
 408     /**
 409      * Skip over whitespace and detect end of line, adding EOL tokens if
 410      * encountered.
 411      *
 412      * @param addEOL true if EOL tokens should be recorded.
 413      */
 414     private void skipWhitespace(final boolean addEOL) {
 415         while (isWhitespace(ch0)) {
 416             if (isEOL(ch0)) {
 417                 skipEOL(addEOL);
 418             } else {
 419                 skip(1);
 420             }
 421         }
 422     }
 423 
 424     /**
 425      * Skip over comments.
 426      *
 427      * @return True if a comment.
 428      */
 429     protected boolean skipComments() {
 430         if (ch0 == '/') {
 431             // Is it a // comment.
 432             if (ch1 == '/') {
 433                 // Skip over //.
 434                 skip(2);
 435                 // Scan for EOL.
 436                 while (!atEOF() && !isEOL(ch0)) {
 437                     skip(1);
 438                 }
 439                 // Did detect a comment.
 440                 return true;
 441             } else if (ch1 == '*') {
 442                 // Record beginning of comment.
 443                 final int start = position;
 444                 // Skip over /*.
 445                 skip(2);
 446                 // Scan for */.
 447                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 448                     // If end of line handle else skip character.
 449                     if (isEOL(ch0)) {
 450                         skipEOL(true);
 451                     } else {
 452                         skip(1);
 453                     }
 454                 }
 455 
 456                 if (atEOF()) {
 457                     // TODO - Report closing */ missing in parser.
 458                     add(ERROR, start);
 459                 } else {
 460                     // Skip */.
 461                     skip(2);
 462                 }
 463 
 464                 // Did detect a comment.
 465                 return true;
 466             }
 467         }
 468 
 469         if (scripting && ch0 == '#') {
 470             // shell style comment
 471             // Skip over #.
 472             skip(1);
 473             // Scan for EOL.
 474             while (!atEOF() && !isEOL(ch0)) {
 475                 skip(1);
 476             }
 477             // Did detect a comment.
 478             return true;
 479         }
 480 
 481         // Not a comment.
 482         return false;
 483     }
 484 
 485     /**
 486      * Convert a regex token to a token object.
 487      *
 488      * @param start  Position in source content.
 489      * @param length Length of regex token.
 490      * @return Regex token object.
 491      */
 492     public RegexToken valueOfPattern(final int start, final int length) {
 493         // Save the current position.
 494         final int savePosition = position;
 495         // Reset to beginning of content.
 496         reset(start);
 497         // Buffer for recording characters.
 498         final StringBuilder sb = new StringBuilder(length);
 499 
 500         // Skip /.
 501         skip(1);
 502         boolean inBrackets = false;
 503         // Scan for closing /, stopping at end of line.
 504         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 505             // Skip over escaped character.
 506             if (ch0 == '\\') {
 507                 sb.append(ch0);
 508                 sb.append(ch1);
 509                 skip(2);
 510             } else {
 511                 if (ch0 == '[') {
 512                     inBrackets = true;
 513                 } else if (ch0 == ']') {
 514                     inBrackets = false;
 515                 }
 516 
 517                 // Skip literal character.
 518                 sb.append(ch0);
 519                 skip(1);
 520             }
 521         }
 522 
 523         // Get pattern as string.
 524         final String regex = sb.toString();
 525 
 526         // Skip /.
 527         skip(1);
 528 
 529         // Options as string.
 530         final String options = source.getString(position, scanIdentifier());
 531 
 532         reset(savePosition);
 533 
 534         // Compile the pattern.
 535         return new RegexToken(regex, options);
 536     }
 537 
 538     /**
 539      * Return true if the given token can be the beginning of a literal.
 540      *
 541      * @param token a token
 542      * @return true if token can start a literal.
 543      */
 544     public boolean canStartLiteral(final TokenType token) {
 545         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 546     }
 547 
 548     /**
 549      * Check whether the given token represents the beginning of a literal. If so scan
 550      * the literal and return <tt>true</tt>, otherwise return false.
 551      *
 552      * @param token the token.
 553      * @param startTokenType the token type.
 554      * @return True if a literal beginning with startToken was found and scanned.
 555      */
 556     protected boolean scanLiteral(final long token, final TokenType startTokenType) {
 557         // Check if it can be a literal.
 558         if (!canStartLiteral(startTokenType)) {
 559             return false;
 560         }
 561         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 562         if (stream.get(stream.last()) != token) {
 563             return false;
 564         }
 565         // Rewind to token start position
 566         reset(Token.descPosition(token));
 567 
 568         if (ch0 == '/') {
 569             return scanRegEx();
 570         } else if (ch0 == '<') {
 571             if (ch1 == '<') {
 572                 return scanHereString();
 573             } else if (Character.isJavaIdentifierStart(ch1)) {
 574                 return scanXMLLiteral();
 575             }
 576         }
 577 
 578         return false;
 579     }
 580 
 581     /**
 582      * Scan over regex literal.
 583      *
 584      * @return True if a regex literal.
 585      */
 586     private boolean scanRegEx() {
 587         assert ch0 == '/';
 588         // Make sure it's not a comment.
 589         if (ch1 != '/' && ch1 != '*') {
 590             // Record beginning of literal.
 591             final int start = position;
 592             // Skip /.
 593             skip(1);
 594             boolean inBrackets = false;
 595 
 596             // Scan for closing /, stopping at end of line.
 597             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 598                 // Skip over escaped character.
 599                 if (ch0 == '\\') {
 600                     skip(1);
 601                     if (isEOL(ch0)) {
 602                         reset(start);
 603                         return false;
 604                     }
 605                     skip(1);
 606                 } else {
 607                     if (ch0 == '[') {
 608                         inBrackets = true;
 609                     } else if (ch0 == ']') {
 610                         inBrackets = false;
 611                     }
 612 
 613                     // Skip literal character.
 614                     skip(1);
 615                 }
 616             }
 617 
 618             // If regex literal.
 619             if (ch0 == '/') {
 620                 // Skip /.
 621                 skip(1);
 622 
 623                 // Skip over options.
 624                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 625                     skip(1);
 626                 }
 627 
 628                 // Add regex token.
 629                 add(REGEX, start);
 630                 // Regex literal detected.
 631                 return true;
 632             }
 633 
 634             // False start try again.
 635             reset(start);
 636         }
 637 
 638         // Regex literal not detected.
 639         return false;
 640     }
 641 
 642     /**
 643      * Convert a digit to a integer.  Can't use Character.digit since we are
 644      * restricted to ASCII by the spec.
 645      *
 646      * @param ch   Character to convert.
 647      * @param base Numeric base.
 648      *
 649      * @return The converted digit or -1 if invalid.
 650      */
 651     protected static int convertDigit(final char ch, final int base) {
 652         int digit;
 653 
 654         if ('0' <= ch && ch <= '9') {
 655             digit = ch - '0';
 656         } else if ('A' <= ch && ch <= 'Z') {
 657             digit = ch - 'A' + 10;
 658         } else if ('a' <= ch && ch <= 'z') {
 659             digit = ch - 'a' + 10;
 660         } else {
 661             return -1;
 662         }
 663 
 664         return digit < base ? digit : -1;
 665     }
 666 
 667 
 668     /**
 669      * Get the value of a hexadecimal numeric sequence.
 670      *
 671      * @param length Number of digits.
 672      * @param type   Type of token to report against.
 673      * @return Value of sequence or < 0 if no digits.
 674      */
 675     private int hexSequence(final int length, final TokenType type) {
 676         int value = 0;
 677 
 678         for (int i = 0; i < length; i++) {
 679             final int digit = convertDigit(ch0, 16);
 680 
 681             if (digit == -1) {
 682                 error(Lexer.message("invalid.hex"), type, position, limit);
 683                 return i == 0 ? -1 : value;
 684             }
 685 
 686             value = digit | value << 4;
 687             skip(1);
 688         }
 689 
 690         return value;
 691     }
 692 
 693     /**
 694      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 695      *
 696      * @return Value of sequence.
 697      */
 698     private int octalSequence() {
 699         int value = 0;
 700 
 701         for (int i = 0; i < 3; i++) {
 702             final int digit = convertDigit(ch0, 8);
 703 
 704             if (digit == -1) {
 705                 break;
 706             }
 707             value = digit | value << 3;
 708             skip(1);
 709 
 710             if (i == 1 && value >= 32) {
 711                 break;
 712             }
 713         }
 714         return value;
 715     }
 716 
 717     /**
 718      * Convert a string to a JavaScript identifier.
 719      *
 720      * @param start  Position in source content.
 721      * @param length Length of token.
 722      * @return Ident string or null if an error.
 723      */
 724     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 725         // Save the current position.
 726         final int savePosition = position;
 727         // End of scan.
 728         final int end = start + length;
 729         // Reset to beginning of content.
 730         reset(start);
 731         // Buffer for recording characters.
 732         final StringBuilder sb = new StringBuilder(length);
 733 
 734         // Scan until end of line or end of file.
 735         while (!atEOF() && position < end && !isEOL(ch0)) {
 736             // If escape character.
 737             if (ch0 == '\\' && ch1 == 'u') {
 738                 skip(2);
 739                 final int ch = hexSequence(4, TokenType.IDENT);
 740                 if (isWhitespace((char)ch)) {
 741                     return null;
 742                 }
 743                 if (ch < 0) {
 744                     sb.append('\\');
 745                     sb.append('u');
 746                 } else {
 747                     sb.append((char)ch);
 748                 }
 749             } else {
 750                 // Add regular character.
 751                 sb.append(ch0);
 752                 skip(1);
 753             }
 754         }
 755 
 756         // Restore position.
 757         reset(savePosition);
 758 
 759         return sb.toString();
 760     }
 761 
 762     /**
 763      * Scan over and identifier or keyword. Handles identifiers containing
 764      * encoded Unicode chars.
 765      *
 766      * Example:
 767      *
 768      * var \u0042 = 44;
 769      */
 770     private void scanIdentifierOrKeyword() {
 771         // Record beginning of identifier.
 772         final int start = position;
 773         // Scan identifier.
 774         final int length = scanIdentifier();
 775         // Check to see if it is a keyword.
 776         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 777         // Add keyword or identifier token.
 778         add(type, start);
 779     }
 780 
 781     /**
 782      * Convert a string to a JavaScript string object.
 783      *
 784      * @param start  Position in source content.
 785      * @param length Length of token.
 786      * @return JavaScript string object.
 787      */
 788     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 789         // Save the current position.
 790         final int savePosition = position;
 791         // Calculate the end position.
 792         final int end = start + length;
 793         // Reset to beginning of string.
 794         reset(start);
 795 
 796         // Buffer for recording characters.
 797         final StringBuilder sb = new StringBuilder(length);
 798 
 799         // Scan until end of string.
 800         while (position < end) {
 801             // If escape character.
 802             if (ch0 == '\\') {
 803                 skip(1);
 804 
 805                 final char next = ch0;
 806                 final int afterSlash = position;
 807 
 808                 skip(1);
 809 
 810                 // Special characters.
 811                 switch (next) {
 812                 case '0':
 813                 case '1':
 814                 case '2':
 815                 case '3':
 816                 case '4':
 817                 case '5':
 818                 case '6':
 819                 case '7': {
 820                     if (strict) {
 821                         // "\0" itself is allowed in strict mode. Only other 'real'
 822                         // octal escape sequences are not allowed (eg. "\02", "\31").
 823                         // See section 7.8.4 String literals production EscapeSequence
 824                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 825                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 826                         }
 827                     }
 828                     reset(afterSlash);
 829                     // Octal sequence.
 830                     final int ch = octalSequence();
 831 
 832                     if (ch < 0) {
 833                         sb.append('\\');
 834                         sb.append('x');
 835                     } else {
 836                         sb.append((char)ch);
 837                     }
 838                     break;
 839                 }
 840                 case 'n':
 841                     sb.append('\n');
 842                     break;
 843                 case 't':
 844                     sb.append('\t');
 845                     break;
 846                 case 'b':
 847                     sb.append('\b');
 848                     break;
 849                 case 'f':
 850                     sb.append('\f');
 851                     break;
 852                 case 'r':
 853                     sb.append('\r');
 854                     break;
 855                 case '\'':
 856                     sb.append('\'');
 857                     break;
 858                 case '\"':
 859                     sb.append('\"');
 860                     break;
 861                 case '\\':
 862                     sb.append('\\');
 863                     break;
 864                 case '\r': // CR | CRLF
 865                     if (ch0 == '\n') {
 866                         skip(1);
 867                     }
 868                     // fall through
 869                 case '\n': // LF
 870                 case '\u2028': // LS
 871                 case '\u2029': // PS
 872                     // continue on the next line, slash-return continues string
 873                     // literal
 874                     break;
 875                 case 'x': {
 876                     // Hex sequence.
 877                     final int ch = hexSequence(2, STRING);
 878 
 879                     if (ch < 0) {
 880                         sb.append('\\');
 881                         sb.append('x');
 882                     } else {
 883                         sb.append((char)ch);
 884                     }
 885                 }
 886                     break;
 887                 case 'u': {
 888                     // Unicode sequence.
 889                     final int ch = hexSequence(4, STRING);
 890 
 891                     if (ch < 0) {
 892                         sb.append('\\');
 893                         sb.append('u');
 894                     } else {
 895                         sb.append((char)ch);
 896                     }
 897                 }
 898                     break;
 899                 case 'v':
 900                     sb.append('\u000B');
 901                     break;
 902                 // All other characters.
 903                 default:
 904                     sb.append(next);
 905                     break;
 906                 }
 907             } else {
 908                 // Add regular character.
 909                 sb.append(ch0);
 910                 skip(1);
 911             }
 912         }
 913 
 914         // Restore position.
 915         reset(savePosition);
 916 
 917         return sb.toString();
 918     }
 919 
 920     /**
 921      * Scan over a string literal.
 922      */
 923     protected void scanString(final boolean add) {
 924         // Type of string.
 925         TokenType type = STRING;
 926         // Record starting quote.
 927         final char quote = ch0;
 928         // Skip over quote.
 929         skip(1);
 930 
 931         // Record beginning of string content.
 932         final State stringState = saveState();
 933 
 934         // Scan until close quote or end of line.
 935         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 936             // Skip over escaped character.
 937             if (ch0 == '\\') {
 938                 type = ESCSTRING;
 939                 skip(1);
 940                 if (! isEscapeCharacter(ch0)) {
 941                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 942                 }
 943                 if (isEOL(ch0)) {
 944                     // Multiline string literal
 945                     skipEOL(false);
 946                     continue;
 947                 }
 948             }
 949             // Skip literal character.
 950             skip(1);
 951         }
 952 
 953         // If close quote.
 954         if (ch0 == quote) {
 955             // Skip close quote.
 956             skip(1);
 957         } else {
 958             error(Lexer.message("missing.close.quote"), STRING, position, limit);
 959         }
 960 
 961         // If not just scanning.
 962         if (add) {
 963             // Record end of string.
 964             stringState.setLimit(position - 1);
 965 
 966             if (scripting && !stringState.isEmpty()) {
 967                 switch (quote) {
 968                 case '`':
 969                     // Mark the beginning of an exec string.
 970                     add(EXECSTRING, stringState.position, stringState.limit);
 971                     // Frame edit string with left brace.
 972                     add(LBRACE, stringState.position, stringState.position);
 973                     // Process edit string.
 974                     editString(type, stringState);
 975                     // Frame edit string with right brace.
 976                     add(RBRACE, stringState.limit, stringState.limit);
 977                     break;
 978                 case '"':
 979                     // Only edit double quoted strings.
 980                     editString(type, stringState);
 981                     break;
 982                 case '\'':
 983                     // Add string token without editing.
 984                     add(type, stringState.position, stringState.limit);
 985                     break;
 986                 default:
 987                     break;
 988                 }
 989             } else {
 990                 /// Add string token without editing.
 991                 add(type, stringState.position, stringState.limit);
 992             }
 993         }
 994     }
 995 
 996     /**
 997      * Is the given character a valid escape char after "\" ?
 998      *
 999      * @param ch character to be checked
1000      * @return if the given character is valid after "\"
1001      */
1002     protected boolean isEscapeCharacter(final char ch) {
1003         return true;
1004     }
1005 
1006     /**
1007      * Convert string to number.
1008      *
1009      * @param valueString  String to convert.
1010      * @param radix        Numeric base.
1011      * @return Converted number.
1012      */
1013     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1014         try {
1015             final long value = Long.parseLong(valueString, radix);
1016             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1017                 return Integer.valueOf((int)value);
1018             }
1019             return Long.valueOf(value);
1020         } catch (final NumberFormatException e) {
1021             if (radix == 10) {
1022                 return Double.valueOf(valueString);
1023             }
1024 
1025             double value = 0.0;
1026 
1027             for (int i = 0; i < valueString.length(); i++) {
1028                 final char ch = valueString.charAt(i);
1029                 // Preverified, should always be a valid digit.
1030                 final int digit = convertDigit(ch, radix);
1031                 value *= radix;
1032                 value += digit;
1033             }
1034 
1035             return value;
1036         }
1037     }
1038 
1039     /**
1040      * Convert string to number.
1041      *
1042      * @param valueString String to convert.
1043      * @return Converted number.
1044      */
1045     private static Number valueOf(final String valueString) throws NumberFormatException {
1046         return JSType.narrowestIntegerRepresentation(Double.valueOf(valueString));
1047     }
1048 
1049     /**
1050      * Scan a number.
1051      */
1052     protected void scanNumber() {
1053         // Record beginning of number.
1054         final int start = position;
1055         // Assume value is a decimal.
1056         TokenType type = DECIMAL;
1057 
1058         // First digit of number.
1059         int digit = convertDigit(ch0, 10);
1060 
1061         // If number begins with 0x.
1062         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1063             // Skip over 0xN.
1064             skip(3);
1065             // Skip over remaining digits.
1066             while (convertDigit(ch0, 16) != -1) {
1067                 skip(1);
1068             }
1069 
1070             type = HEXADECIMAL;
1071         } else {
1072             // Check for possible octal constant.
1073             boolean octal = digit == 0;
1074             // Skip first digit if not leading '.'.
1075             if (digit != -1) {
1076                 skip(1);
1077             }
1078 
1079             // Skip remaining digits.
1080             while ((digit = convertDigit(ch0, 10)) != -1) {
1081                 // Check octal only digits.
1082                 octal = octal && digit < 8;
1083                 // Skip digit.
1084                 skip(1);
1085             }
1086 
1087             if (octal && position - start > 1) {
1088                 type = OCTAL;
1089             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1090                 // Must be a double.
1091                 if (ch0 == '.') {
1092                     // Skip period.
1093                     skip(1);
1094                     // Skip mantissa.
1095                     while (convertDigit(ch0, 10) != -1) {
1096                         skip(1);
1097                     }
1098                 }
1099 
1100                 // Detect exponent.
1101                 if (ch0 == 'E' || ch0 == 'e') {
1102                     // Skip E.
1103                     skip(1);
1104                     // Detect and skip exponent sign.
1105                     if (ch0 == '+' || ch0 == '-') {
1106                         skip(1);
1107                     }
1108                     // Skip exponent.
1109                     while (convertDigit(ch0, 10) != -1) {
1110                         skip(1);
1111                     }
1112                 }
1113 
1114                 type = FLOATING;
1115             }
1116         }
1117 
1118         if (Character.isJavaIdentifierStart(ch0)) {
1119             error(Lexer.message("missing.space.after.number"), type, position, 1);
1120         }
1121 
1122         // Add number token.
1123         add(type, start);
1124     }
1125 
1126     /**
1127      * Convert a regex token to a token object.
1128      *
1129      * @param start  Position in source content.
1130      * @param length Length of regex token.
1131      * @return Regex token object.
1132      */
1133     XMLToken valueOfXML(final int start, final int length) {
1134         return new XMLToken(source.getString(start, length));
1135     }
1136 
1137     /**
1138      * Scan over a XML token.
1139      *
1140      * @return TRUE if is an XML literal.
1141      */
1142     private boolean scanXMLLiteral() {
1143         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1144         if (XML_LITERALS) {
1145             // Record beginning of xml expression.
1146             final int start = position;
1147 
1148             int openCount = 0;
1149 
1150             do {
1151                 if (ch0 == '<') {
1152                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1153                         skip(3);
1154                         openCount--;
1155                     } else if (Character.isJavaIdentifierStart(ch1)) {
1156                         skip(2);
1157                         openCount++;
1158                     } else if (ch1 == '?') {
1159                         skip(2);
1160                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1161                         skip(4);
1162                     } else {
1163                         reset(start);
1164                         return false;
1165                     }
1166 
1167                     while (!atEOF() && ch0 != '>') {
1168                         if (ch0 == '/' && ch1 == '>') {
1169                             openCount--;
1170                             skip(1);
1171                             break;
1172                         } else if (ch0 == '\"' || ch0 == '\'') {
1173                             scanString(false);
1174                         } else {
1175                             skip(1);
1176                         }
1177                     }
1178 
1179                     if (ch0 != '>') {
1180                         reset(start);
1181                         return false;
1182                     }
1183 
1184                     skip(1);
1185                 } else if (atEOF()) {
1186                     reset(start);
1187                     return false;
1188                 } else {
1189                     skip(1);
1190                 }
1191             } while (openCount > 0);
1192 
1193             add(XML, start);
1194             return true;
1195         }
1196 
1197         return false;
1198     }
1199 
1200     /**
1201      * Scan over identifier characters.
1202      *
1203      * @return Length of identifier or zero if none found.
1204      */
1205     private int scanIdentifier() {
1206         final int start = position;
1207 
1208         // Make sure first character is valid start character.
1209         if (ch0 == '\\' && ch1 == 'u') {
1210             skip(2);
1211             final int ch = hexSequence(4, TokenType.IDENT);
1212 
1213             if (!Character.isJavaIdentifierStart(ch)) {
1214                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1215             }
1216         } else if (!Character.isJavaIdentifierStart(ch0)) {
1217             // Not an identifier.
1218             return 0;
1219         }
1220 
1221         // Make sure remaining characters are valid part characters.
1222         while (!atEOF()) {
1223             if (ch0 == '\\' && ch1 == 'u') {
1224                 skip(2);
1225                 final int ch = hexSequence(4, TokenType.IDENT);
1226 
1227                 if (!Character.isJavaIdentifierPart(ch)) {
1228                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1229                 }
1230             } else if (Character.isJavaIdentifierPart(ch0)) {
1231                 skip(1);
1232             } else {
1233                 break;
1234             }
1235         }
1236 
1237         // Length of identifier sequence.
1238         return position - start;
1239     }
1240 
1241     /**
1242      * Compare two identifiers (in content) for equality.
1243      *
1244      * @param aStart  Start of first identifier.
1245      * @param aLength Length of first identifier.
1246      * @param bStart  Start of second identifier.
1247      * @param bLength Length of second identifier.
1248      * @return True if equal.
1249      */
1250     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1251         if (aLength == bLength) {
1252             for (int i = 0; i < aLength; i++) {
1253                 if (content[aStart + i] != content[bStart + i]) {
1254                     return false;
1255                 }
1256             }
1257 
1258             return true;
1259         }
1260 
1261         return false;
1262     }
1263 
1264     /**
1265      * Detect if a line starts with a marker identifier.
1266      *
1267      * @param identStart  Start of identifier.
1268      * @param identLength Length of identifier.
1269      * @return True if detected.
1270      */
1271     private boolean hasHereMarker(final int identStart, final int identLength) {
1272         // Skip any whitespace.
1273         skipWhitespace(false);
1274 
1275         return identifierEqual(identStart, identLength, position, scanIdentifier());
1276     }
1277 
1278     /**
1279      * Lexer to service edit strings.
1280      */
1281     private static class EditStringLexer extends Lexer {
1282         /** Type of string literals to emit. */
1283         final TokenType stringType;
1284 
1285         /*
1286          * Constructor.
1287          */
1288 
1289         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1290             super(lexer, stringState);
1291 
1292             this.stringType = stringType;
1293         }
1294 
1295         /**
1296          * Lexify the contents of the string.
1297          */
1298         @Override
1299         public void lexify() {
1300             // Record start of string position.
1301             int stringStart = position;
1302             // Indicate that the priming first string has not been emitted.
1303             boolean primed = false;
1304 
1305             while (true) {
1306                 // Detect end of content.
1307                 if (atEOF()) {
1308                     break;
1309                 }
1310 
1311                 // Honour escapes (should be well formed.)
1312                 if (ch0 == '\\' && stringType == ESCSTRING) {
1313                     skip(2);
1314 
1315                     continue;
1316                 }
1317 
1318                 // If start of expression.
1319                 if (ch0 == '$' && ch1 == '{') {
1320                     if (!primed || stringStart != position) {
1321                         if (primed) {
1322                             add(ADD, stringStart, stringStart + 1);
1323                         }
1324 
1325                         add(stringType, stringStart, position);
1326                         primed = true;
1327                     }
1328 
1329                     // Skip ${
1330                     skip(2);
1331 
1332                     // Save expression state.
1333                     final State expressionState = saveState();
1334 
1335                     // Start with one open brace.
1336                     int braceCount = 1;
1337 
1338                     // Scan for the rest of the string.
1339                     while (!atEOF()) {
1340                         // If closing brace.
1341                         if (ch0 == '}') {
1342                             // Break only only if matching brace.
1343                             if (--braceCount == 0) {
1344                                 break;
1345                             }
1346                         } else if (ch0 == '{') {
1347                             // Bump up the brace count.
1348                             braceCount++;
1349                         }
1350 
1351                         // Skip to next character.
1352                         skip(1);
1353                     }
1354 
1355                     // If braces don't match then report an error.
1356                     if (braceCount != 0) {
1357                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1358                     }
1359 
1360                     // Mark end of expression.
1361                     expressionState.setLimit(position);
1362                     // Skip closing brace.
1363                     skip(1);
1364 
1365                     // Start next string.
1366                     stringStart = position;
1367 
1368                     // Concatenate expression.
1369                     add(ADD, expressionState.position, expressionState.position + 1);
1370                     add(LPAREN, expressionState.position, expressionState.position + 1);
1371 
1372                     // Scan expression.
1373                     final Lexer lexer = new Lexer(this, expressionState);
1374                     lexer.lexify();
1375 
1376                     // Close out expression parenthesis.
1377                     add(RPAREN, position - 1, position);
1378 
1379                     continue;
1380                 }
1381 
1382                 // Next character in string.
1383                 skip(1);
1384             }
1385 
1386             // If there is any unemitted string portion.
1387             if (stringStart != limit) {
1388                 // Concatenate remaining string.
1389                 if (primed) {
1390                     add(ADD, stringStart, 1);
1391                 }
1392 
1393                 add(stringType, stringStart, limit);
1394             }
1395         }
1396 
1397     }
1398 
1399     /**
1400      * Edit string for nested expressions.
1401      *
1402      * @param stringType  Type of string literals to emit.
1403      * @param stringState State of lexer at start of string.
1404      */
1405     private void editString(final TokenType stringType, final State stringState) {
1406         // Use special lexer to scan string.
1407         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1408         lexer.lexify();
1409 
1410         // Need to keep lexer informed.
1411         last = stringType;
1412     }
1413 
1414     /**
1415      * Scan over a here string.
1416      *
1417      * @return TRUE if is a here string.
1418      */
1419     private boolean scanHereString() {
1420         assert ch0 == '<' && ch1 == '<';
1421         if (scripting) {
1422             // Record beginning of here string.
1423             final State saved = saveState();
1424 
1425             // << or <<<
1426             final boolean excludeLastEOL = ch2 != '<';
1427 
1428             if (excludeLastEOL) {
1429                 skip(2);
1430             } else {
1431                 skip(3);
1432             }
1433 
1434             // Scan identifier.
1435             final int identStart = position;
1436             final int identLength = scanIdentifier();
1437 
1438             // Check for identifier.
1439             if (identLength == 0) {
1440                 // Treat as shift.
1441                 restoreState(saved);
1442 
1443                 return false;
1444             }
1445 
1446             // Record rest of line.
1447             final State restState = saveState();
1448             skipLine(false);
1449             restState.setLimit(position);
1450 
1451             // Record beginning of string.
1452             final State stringState = saveState();
1453             int stringEnd = position;
1454 
1455             // Hunt down marker.
1456             while (!atEOF()) {
1457                 // Skip any whitespace.
1458                 skipWhitespace(false);
1459 
1460                 if (hasHereMarker(identStart, identLength)) {
1461                     break;
1462                 }
1463 
1464                 skipLine(false);
1465                 stringEnd = position;
1466             }
1467 
1468             // Record end of string.
1469             stringState.setLimit(stringEnd);
1470 
1471             // If marker is missing.
1472             if (stringState.isEmpty() || atEOF()) {
1473                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1474                 restoreState(saved);
1475 
1476                 return false;
1477             }
1478 
1479             // Remove last end of line if specified.
1480             if (excludeLastEOL) {
1481                 // Handles \n.
1482                 if (content[stringEnd - 1] == '\n') {
1483                     stringEnd--;
1484                 }
1485 
1486                 // Handles \r and \r\n.
1487                 if (content[stringEnd - 1] == '\r') {
1488                     stringEnd--;
1489                 }
1490 
1491                 // Update end of string.
1492                 stringState.setLimit(stringEnd);
1493             }
1494 
1495             // Edit string if appropriate.
1496             if (scripting && !stringState.isEmpty()) {
1497                 editString(STRING, stringState);
1498             } else {
1499                 // Add here string.
1500                 add(STRING, stringState.position, stringState.limit);
1501             }
1502 
1503             // Scan rest of original line.
1504             final Lexer restLexer = new Lexer(this, restState);
1505 
1506             restLexer.lexify();
1507 
1508             return true;
1509         }
1510 
1511         return false;
1512     }
1513 
1514     /**
1515      * Breaks source content down into lex units, adding tokens to the token
1516      * stream. The routine scans until the stream buffer is full. Can be called
1517      * repeatedly until EOF is detected.
1518      */
1519     public void lexify() {
1520         while (!stream.isFull() || nested) {
1521             // Skip over whitespace.
1522             skipWhitespace(true);
1523 
1524             // Detect end of file.
1525             if (atEOF()) {
1526                 if (!nested) {
1527                     // Add an EOF token at the end.
1528                     add(EOF, position);
1529                 }
1530 
1531                 break;
1532             }
1533 
1534             // Check for comments. Note that we don't scan for regexp and other literals here as
1535             // we may not have enough context to distinguish them from similar looking operators.
1536             // Instead we break on ambiguous operators below and let the parser decide.
1537             if (ch0 == '/' && skipComments()) {
1538                 continue;
1539             }
1540 
1541             if (scripting && ch0 == '#' && skipComments()) {
1542                 continue;
1543             }
1544 
1545             // TokenType for lookup of delimiter or operator.
1546             TokenType type;
1547 
1548             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1549                 // '.' followed by digit.
1550                 // Scan and add a number.
1551                 scanNumber();
1552             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1553                 // Get the number of characters in the token.
1554                 final int typeLength = type.getLength();
1555                 // Skip that many characters.
1556                 skip(typeLength);
1557                 // Add operator token.
1558                 add(type, position - typeLength);
1559                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1560                 // We break to let the parser decide what it is.
1561                 if (canStartLiteral(type)) {
1562                     break;
1563                 }
1564             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1565                 // Scan and add identifier or keyword.
1566                 scanIdentifierOrKeyword();
1567             } else if (isStringDelimiter(ch0)) {
1568                 // Scan and add a string.
1569                 scanString(true);
1570             } else if (Character.isDigit(ch0)) {
1571                 // Scan and add a number.
1572                 scanNumber();
1573             } else {
1574                 // Don't recognize this character.
1575                 skip(1);
1576                 add(ERROR, position - 1);
1577             }
1578         }
1579     }
1580 
1581     /**
1582      * Return value of token given its token descriptor.
1583      *
1584      * @param token  Token descriptor.
1585      * @return JavaScript value.
1586      */
1587     Object getValueOf(final long token, final boolean strict) {
1588         final int start = Token.descPosition(token);
1589         final int len = Token.descLength(token);
1590 
1591         switch (Token.descType(token)) {
1592         case DECIMAL:
1593             return Lexer.valueOf(source.getString(start, len), 10); // number
1594         case OCTAL:
1595             return Lexer.valueOf(source.getString(start, len), 8); // number
1596         case HEXADECIMAL:
1597             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1598         case FLOATING:
1599             return Lexer.valueOf(source.getString(start, len)); // number
1600         case STRING:
1601             return source.getString(start, len); // String
1602         case ESCSTRING:
1603             return valueOfString(start, len, strict); // String
1604         case IDENT:
1605             return valueOfIdent(start, len); // String
1606         case REGEX:
1607             return valueOfPattern(start, len); // RegexToken::LexerToken
1608         case XML:
1609             return valueOfXML(start, len); // XMLToken::LexerToken
1610         default:
1611             break;
1612         }
1613 
1614         return null;
1615     }
1616 
1617     protected static String message(final String msgId, final String... args) {
1618         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1619     }
1620 
1621     /**
1622      * Generate a runtime exception
1623      *
1624      * @param message       error message
1625      * @param type          token type
1626      * @param start         start position of lexed error
1627      * @param length        length of lexed error
1628      * @throws ParserException  unconditionally
1629      */
1630     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1631         final long token     = Token.toDesc(type, start, length);
1632         final int  pos       = Token.descPosition(token);
1633         final int  lineNum   = source.getLine(pos);
1634         final int  columnNum = source.getColumn(pos);
1635         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1636         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1637     }
1638 
1639     /**
1640      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1641      * This is the abstract superclass
1642      */
1643     public static abstract class LexerToken {
1644         private final String expression;
1645 
1646         /**
1647          * Constructor
1648          * @param expression token expression
1649          */
1650         protected LexerToken(final String expression) {
1651             this.expression = expression;
1652         }
1653 
1654         /**
1655          * Get the expression
1656          * @return expression
1657          */
1658         public String getExpression() {
1659             return expression;
1660         }
1661     }
1662 
1663     /**
1664      * Temporary container for regular expressions.
1665      */
1666     public static class RegexToken extends LexerToken {
1667         /** Options. */
1668         private final String options;
1669 
1670         /**
1671          * Constructor.
1672          *
1673          * @param expression  regexp expression
1674          * @param options     regexp options
1675          */
1676         public RegexToken(final String expression, final String options) {
1677             super(expression);
1678             this.options = options;
1679         }
1680 
1681         /**
1682          * Get regexp options
1683          * @return options
1684          */
1685         public String getOptions() {
1686             return options;
1687         }
1688 
1689         @Override
1690         public String toString() {
1691             return '/' + getExpression() + '/' + options;
1692         }
1693     }
1694 
1695     /**
1696      * Temporary container for XML expression.
1697      */
1698     public static class XMLToken extends LexerToken {
1699 
1700         /**
1701          * Constructor.
1702          *
1703          * @param expression  XML expression
1704          */
1705         public XMLToken(final String expression) {
1706             super(expression);
1707         }
1708     }
1709 }