1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
  30 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  33 import static jdk.nashorn.internal.parser.TokenType.EOF;
  34 import static jdk.nashorn.internal.parser.TokenType.EOL;
  35 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  38 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  41 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  42 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  43 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
  45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  48 import static jdk.nashorn.internal.parser.TokenType.STRING;
  49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
  50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
  51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
  52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
  53 import static jdk.nashorn.internal.parser.TokenType.XML;
  54 
  55 import java.io.Serializable;
  56 
  57 import jdk.nashorn.internal.runtime.ECMAErrors;
  58 import jdk.nashorn.internal.runtime.ErrorManager;
  59 import jdk.nashorn.internal.runtime.JSErrorType;
  60 import jdk.nashorn.internal.runtime.JSType;
  61 import jdk.nashorn.internal.runtime.ParserException;
  62 import jdk.nashorn.internal.runtime.Source;
  63 import jdk.nashorn.internal.runtime.options.Options;
  64 
  65 /**
  66  * Responsible for converting source content into a stream of tokens.
  67  *
  68  */
  69 @SuppressWarnings("fallthrough")
  70 public class Lexer extends Scanner {
  71     private static final long MIN_INT_L = Integer.MIN_VALUE;
  72     private static final long MAX_INT_L = Integer.MAX_VALUE;
  73 
  74     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  75 
  76     /** Content source. */
  77     private final Source source;
  78 
  79     /** Buffered stream for tokens. */
  80     private final TokenStream stream;
  81 
  82     /** True if here and edit strings are supported. */
  83     private final boolean scripting;
  84 
  85     /** True if parsing in ECMAScript 6 mode. */
  86     private final boolean es6;
  87 
  88     /** True if a nested scan. (scan to completion, no EOF.) */
  89     private final boolean nested;
  90 
  91     /** Pending new line number and position. */
  92     int pendingLine;
  93 
  94     /** Position of last EOL + 1. */
  95     private int linePosition;
  96 
  97     /** Type of last token added. */
  98     private TokenType last;
  99 
 100     private final boolean pauseOnFunctionBody;
 101     private boolean pauseOnNextLeftBrace;
 102 
 103     private int templateExpressionOpenBraces;
 104 
 105     private static final String JAVASCRIPT_OTHER_WHITESPACE =
 106         "\u2028" + // line separator
 107         "\u2029" + // paragraph separator
 108         "\u00a0" + // Latin-1 space
 109         "\u1680" + // Ogham space mark
 110         "\u180e" + // separator, Mongolian vowel
 111         "\u2000" + // en quad
 112         "\u2001" + // em quad
 113         "\u2002" + // en space
 114         "\u2003" + // em space
 115         "\u2004" + // three-per-em space
 116         "\u2005" + // four-per-em space
 117         "\u2006" + // six-per-em space
 118         "\u2007" + // figure space
 119         "\u2008" + // punctuation space
 120         "\u2009" + // thin space
 121         "\u200a" + // hair space
 122         "\u202f" + // narrow no-break space
 123         "\u205f" + // medium mathematical space
 124         "\u3000" + // ideographic space
 125         "\ufeff"   // byte order mark
 126         ;
 127 
 128     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 129         "\\u000a" + // line feed
 130         "\\u000d" + // carriage return (ctrl-m)
 131         "\\u2028" + // line separator
 132         "\\u2029" + // paragraph separator
 133         "\\u0009" + // tab
 134         "\\u0020" + // ASCII space
 135         "\\u000b" + // tabulation line
 136         "\\u000c" + // ff (ctrl-l)
 137         "\\u00a0" + // Latin-1 space
 138         "\\u1680" + // Ogham space mark
 139         "\\u180e" + // separator, Mongolian vowel
 140         "\\u2000" + // en quad
 141         "\\u2001" + // em quad
 142         "\\u2002" + // en space
 143         "\\u2003" + // em space
 144         "\\u2004" + // three-per-em space
 145         "\\u2005" + // four-per-em space
 146         "\\u2006" + // six-per-em space
 147         "\\u2007" + // figure space
 148         "\\u2008" + // punctuation space
 149         "\\u2009" + // thin space
 150         "\\u200a" + // hair space
 151         "\\u202f" + // narrow no-break space
 152         "\\u205f" + // medium mathematical space
 153         "\\u3000" + // ideographic space
 154         "\\ufeff"   // byte order mark
 155         ;
 156 
 157     static String unicodeEscape(final char ch) {
 158         final StringBuilder sb = new StringBuilder();
 159 
 160         sb.append("\\u");
 161 
 162         final String hex = Integer.toHexString(ch);
 163         for (int i = hex.length(); i < 4; i++) {
 164             sb.append('0');
 165         }
 166         sb.append(hex);
 167 
 168         return sb.toString();
 169     }
 170 
 171     /**
 172      * Constructor
 173      *
 174      * @param source    the source
 175      * @param stream    the token stream to lex
 176      */
 177     public Lexer(final Source source, final TokenStream stream) {
 178         this(source, stream, false, false);
 179     }
 180 
 181     /**
 182      * Constructor
 183      *
 184      * @param source    the source
 185      * @param stream    the token stream to lex
 186      * @param scripting are we in scripting mode
 187      * @param es6       are we in ECMAScript 6 mode
 188      */
 189     public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
 190         this(source, 0, source.getLength(), stream, scripting, es6, false);
 191     }
 192 
 193     /**
 194      * Constructor
 195      *
 196      * @param source    the source
 197      * @param start     start position in source from which to start lexing
 198      * @param len       length of source segment to lex
 199      * @param stream    token stream to lex
 200      * @param scripting are we in scripting mode
 201      * @param es6       are we in ECMAScript 6 mode
 202      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 203      * function body. This is used with the feature where the parser is skipping nested function bodies to
 204      * avoid reading ahead unnecessarily when we skip the function bodies.
 205      */
 206     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
 207         super(source.getContent(), 1, start, len);
 208         this.source      = source;
 209         this.stream      = stream;
 210         this.scripting   = scripting;
 211         this.es6         = es6;
 212         this.nested      = false;
 213         this.pendingLine = 1;
 214         this.last        = EOL;
 215 
 216         this.pauseOnFunctionBody = pauseOnFunctionBody;
 217     }
 218 
 219     private Lexer(final Lexer lexer, final State state) {
 220         super(lexer, state);
 221 
 222         source = lexer.source;
 223         stream = lexer.stream;
 224         scripting = lexer.scripting;
 225         es6 = lexer.es6;
 226         nested = true;
 227 
 228         pendingLine = state.pendingLine;
 229         linePosition = state.linePosition;
 230         last = EOL;
 231         pauseOnFunctionBody = false;
 232     }
 233 
 234     static class State extends Scanner.State {
 235         /** Pending new line number and position. */
 236         public final int pendingLine;
 237 
 238         /** Position of last EOL + 1. */
 239         public final int linePosition;
 240 
 241         /** Type of last token added. */
 242         public final TokenType last;
 243 
 244         /*
 245          * Constructor.
 246          */
 247 
 248         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 249             super(position, limit, line);
 250 
 251             this.pendingLine = pendingLine;
 252             this.linePosition = linePosition;
 253             this.last = last;
 254         }
 255     }
 256 
 257     /**
 258      * Save the state of the scan.
 259      *
 260      * @return Captured state.
 261      */
 262     @Override
 263     State saveState() {
 264         return new State(position, limit, line, pendingLine, linePosition, last);
 265     }
 266 
 267     /**
 268      * Restore the state of the scan.
 269      *
 270      * @param state
 271      *            Captured state.
 272      */
 273     void restoreState(final State state) {
 274         super.restoreState(state);
 275 
 276         pendingLine = state.pendingLine;
 277         linePosition = state.linePosition;
 278         last = state.last;
 279     }
 280 
 281     /**
 282      * Add a new token to the stream.
 283      *
 284      * @param type
 285      *            Token type.
 286      * @param start
 287      *            Start position.
 288      * @param end
 289      *            End position.
 290      */
 291     protected void add(final TokenType type, final int start, final int end) {
 292         // Record last token.
 293         last = type;
 294 
 295         // Only emit the last EOL in a cluster.
 296         if (type == EOL) {
 297             pendingLine = end;
 298             linePosition = start;
 299         } else {
 300             // Write any pending EOL to stream.
 301             if (pendingLine != -1) {
 302                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 303                 pendingLine = -1;
 304             }
 305 
 306             // Write token to stream.
 307             stream.put(Token.toDesc(type, start, end - start));
 308         }
 309     }
 310 
 311     /**
 312      * Add a new token to the stream.
 313      *
 314      * @param type
 315      *            Token type.
 316      * @param start
 317      *            Start position.
 318      */
 319     protected void add(final TokenType type, final int start) {
 320         add(type, start, position);
 321     }
 322 
 323     /**
 324      * Return the String of valid whitespace characters for regular
 325      * expressions in JavaScript
 326      * @return regexp whitespace string
 327      */
 328     public static String getWhitespaceRegExp() {
 329         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 330     }
 331 
 332     /**
 333      * Skip end of line.
 334      *
 335      * @param addEOL true if EOL token should be recorded.
 336      */
 337     private void skipEOL(final boolean addEOL) {
 338 
 339         if (ch0 == '\r') { // detect \r\n pattern
 340             skip(1);
 341             if (ch0 == '\n') {
 342                 skip(1);
 343             }
 344         } else { // all other space, ch0 is guaranteed to be EOL or \0
 345             skip(1);
 346         }
 347 
 348         // bump up line count
 349         line++;
 350 
 351         if (addEOL) {
 352             // Add an EOL token.
 353             add(EOL, position, line);
 354         }
 355     }
 356 
 357     /**
 358      * Skip over rest of line including end of line.
 359      *
 360      * @param addEOL true if EOL token should be recorded.
 361      */
 362     private void skipLine(final boolean addEOL) {
 363         // Ignore characters.
 364         while (!isEOL(ch0) && !atEOF()) {
 365             skip(1);
 366         }
 367         // Skip over end of line.
 368         skipEOL(addEOL);
 369     }
 370 
 371     /**
 372      * Test whether a char is valid JavaScript whitespace
 373      * @param ch a char
 374      * @return true if valid JavaScript whitespace
 375      */
 376     public static boolean isJSWhitespace(final char ch) {
 377         return ch == ' '                  // space
 378             || ch >= '\t' && ch <= '\r'   // 0x09..0x0d: tab, line feed, tabulation line, ff, carriage return
 379             || ch >= 160 && isOtherJSWhitespace(ch);
 380     }
 381 
 382     private static boolean isOtherJSWhitespace(final char ch) {
 383         return JAVASCRIPT_OTHER_WHITESPACE.indexOf(ch) != -1;
 384     }
 385 
 386     /**
 387      * Test whether a char is valid JavaScript end of line
 388      * @param ch a char
 389      * @return true if valid JavaScript end of line
 390      */
 391     public static boolean isJSEOL(final char ch) {
 392         return ch == '\n'      // line feed
 393             || ch == '\r'      // carriage return (ctrl-m)
 394             || ch == '\u2028'  // line separator
 395             || ch == '\u2029'; // paragraph separator
 396     }
 397 
 398     /**
 399      * Test if char is a string delimiter, e.g. '\' or '"'.
 400      * @param ch a char
 401      * @return true if string delimiter
 402      */
 403     protected boolean isStringDelimiter(final char ch) {
 404         return ch == '\'' || ch == '"';
 405     }
 406 
 407     /**
 408      * Test if char is a template literal delimiter ('`').
 409      */
 410     private static boolean isTemplateDelimiter(final char ch) {
 411         return ch == '`';
 412     }
 413 
 414     /**
 415      * Test whether a char is valid JavaScript whitespace
 416      * @param ch a char
 417      * @return true if valid JavaScript whitespace
 418      */
 419     protected boolean isWhitespace(final char ch) {
 420         return Lexer.isJSWhitespace(ch);
 421     }
 422 
 423     /**
 424      * Test whether a char is valid JavaScript end of line
 425      * @param ch a char
 426      * @return true if valid JavaScript end of line
 427      */
 428     protected boolean isEOL(final char ch) {
 429         return Lexer.isJSEOL(ch);
 430     }
 431 
 432     /**
 433      * Skip over whitespace and detect end of line, adding EOL tokens if
 434      * encountered.
 435      *
 436      * @param addEOL true if EOL tokens should be recorded.
 437      */
 438     private void skipWhitespace(final boolean addEOL) {
 439         while (isWhitespace(ch0)) {
 440             if (isEOL(ch0)) {
 441                 skipEOL(addEOL);
 442             } else {
 443                 skip(1);
 444             }
 445         }
 446     }
 447 
 448     /**
 449      * Skip over comments.
 450      *
 451      * @return True if a comment.
 452      */
 453     protected boolean skipComments() {
 454         // Save the current position.
 455         final int start = position;
 456 
 457         if (ch0 == '/') {
 458             // Is it a // comment.
 459             if (ch1 == '/') {
 460                 // Skip over //.
 461                 skip(2);
 462 
 463                 boolean directiveComment = false;
 464                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 465                     directiveComment = true;
 466                 }
 467 
 468                 // Scan for EOL.
 469                 while (!atEOF() && !isEOL(ch0)) {
 470                     skip(1);
 471                 }
 472                 // Did detect a comment.
 473                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 474                 return true;
 475             } else if (ch1 == '*') {
 476                 // Skip over /*.
 477                 skip(2);
 478                 // Scan for */.
 479                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 480                     // If end of line handle else skip character.
 481                     if (isEOL(ch0)) {
 482                         skipEOL(true);
 483                     } else {
 484                         skip(1);
 485                     }
 486                 }
 487 
 488                 if (atEOF()) {
 489                     // TODO - Report closing */ missing in parser.
 490                     add(ERROR, start);
 491                 } else {
 492                     // Skip */.
 493                     skip(2);
 494                 }
 495 
 496                 // Did detect a comment.
 497                 add(COMMENT, start);
 498                 return true;
 499             }
 500         } else if (ch0 == '#') {
 501             assert scripting;
 502             // shell style comment
 503             // Skip over #.
 504             skip(1);
 505             // Scan for EOL.
 506             while (!atEOF() && !isEOL(ch0)) {
 507                 skip(1);
 508             }
 509             // Did detect a comment.
 510             add(COMMENT, start);
 511             return true;
 512         }
 513 
 514         // Not a comment.
 515         return false;
 516     }
 517 
 518     /**
 519      * Convert a regex token to a token object.
 520      *
 521      * @param start  Position in source content.
 522      * @param length Length of regex token.
 523      * @return Regex token object.
 524      */
 525     public RegexToken valueOfPattern(final int start, final int length) {
 526         // Save the current position.
 527         final int savePosition = position;
 528         // Reset to beginning of content.
 529         reset(start);
 530         // Buffer for recording characters.
 531         final StringBuilder sb = new StringBuilder(length);
 532 
 533         // Skip /.
 534         skip(1);
 535         boolean inBrackets = false;
 536         // Scan for closing /, stopping at end of line.
 537         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 538             // Skip over escaped character.
 539             if (ch0 == '\\') {
 540                 sb.append(ch0);
 541                 sb.append(ch1);
 542                 skip(2);
 543             } else {
 544                 if (ch0 == '[') {
 545                     inBrackets = true;
 546                 } else if (ch0 == ']') {
 547                     inBrackets = false;
 548                 }
 549 
 550                 // Skip literal character.
 551                 sb.append(ch0);
 552                 skip(1);
 553             }
 554         }
 555 
 556         // Get pattern as string.
 557         final String regex = sb.toString();
 558 
 559         // Skip /.
 560         skip(1);
 561 
 562         // Options as string.
 563         final String options = source.getString(position, scanIdentifier());
 564 
 565         reset(savePosition);
 566 
 567         // Compile the pattern.
 568         return new RegexToken(regex, options);
 569     }
 570 
 571     /**
 572      * Return true if the given token can be the beginning of a literal.
 573      *
 574      * @param token a token
 575      * @return true if token can start a literal.
 576      */
 577     public boolean canStartLiteral(final TokenType token) {
 578         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 579     }
 580 
 581     /**
 582      * interface to receive line information for multi-line literals.
 583      */
 584     protected interface LineInfoReceiver {
 585         /**
 586          * Receives line information
 587          * @param line last line number
 588          * @param linePosition position of last line
 589          */
 590         public void lineInfo(int line, int linePosition);
 591     }
 592 
 593     /**
 594      * Check whether the given token represents the beginning of a literal. If so scan
 595      * the literal and return <tt>true</tt>, otherwise return false.
 596      *
 597      * @param token the token.
 598      * @param startTokenType the token type.
 599      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 600      * @return True if a literal beginning with startToken was found and scanned.
 601      */
 602     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 603         // Check if it can be a literal.
 604         if (!canStartLiteral(startTokenType)) {
 605             return false;
 606         }
 607         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 608         if (stream.get(stream.last()) != token) {
 609             return false;
 610         }
 611 
 612         // Record current position in case multiple heredocs start on this line - see JDK-8073653
 613         final State state = saveState();
 614         // Rewind to token start position
 615         reset(Token.descPosition(token));
 616 
 617         if (ch0 == '/') {
 618             return scanRegEx();
 619         } else if (ch0 == '<') {
 620             if (ch1 == '<') {
 621                 return scanHereString(lir, state);
 622             } else if (Character.isJavaIdentifierStart(ch1)) {
 623                 return scanXMLLiteral();
 624             }
 625         }
 626 
 627         return false;
 628     }
 629 
 630     /**
 631      * Scan over regex literal.
 632      *
 633      * @return True if a regex literal.
 634      */
 635     private boolean scanRegEx() {
 636         assert ch0 == '/';
 637         // Make sure it's not a comment.
 638         if (ch1 != '/' && ch1 != '*') {
 639             // Record beginning of literal.
 640             final int start = position;
 641             // Skip /.
 642             skip(1);
 643             boolean inBrackets = false;
 644 
 645             // Scan for closing /, stopping at end of line.
 646             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 647                 // Skip over escaped character.
 648                 if (ch0 == '\\') {
 649                     skip(1);
 650                     if (isEOL(ch0)) {
 651                         reset(start);
 652                         return false;
 653                     }
 654                     skip(1);
 655                 } else {
 656                     if (ch0 == '[') {
 657                         inBrackets = true;
 658                     } else if (ch0 == ']') {
 659                         inBrackets = false;
 660                     }
 661 
 662                     // Skip literal character.
 663                     skip(1);
 664                 }
 665             }
 666 
 667             // If regex literal.
 668             if (ch0 == '/') {
 669                 // Skip /.
 670                 skip(1);
 671 
 672                 // Skip over options.
 673                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 674                     skip(1);
 675                 }
 676 
 677                 // Add regex token.
 678                 add(REGEX, start);
 679                 // Regex literal detected.
 680                 return true;
 681             }
 682 
 683             // False start try again.
 684             reset(start);
 685         }
 686 
 687         // Regex literal not detected.
 688         return false;
 689     }
 690 
 691     /**
 692      * Convert a digit to a integer.  Can't use Character.digit since we are
 693      * restricted to ASCII by the spec.
 694      *
 695      * @param ch   Character to convert.
 696      * @param base Numeric base.
 697      *
 698      * @return The converted digit or -1 if invalid.
 699      */
 700     protected static int convertDigit(final char ch, final int base) {
 701         int digit;
 702 
 703         if ('0' <= ch && ch <= '9') {
 704             digit = ch - '0';
 705         } else if ('A' <= ch && ch <= 'Z') {
 706             digit = ch - 'A' + 10;
 707         } else if ('a' <= ch && ch <= 'z') {
 708             digit = ch - 'a' + 10;
 709         } else {
 710             return -1;
 711         }
 712 
 713         return digit < base ? digit : -1;
 714     }
 715 
 716 
 717     /**
 718      * Get the value of a hexadecimal numeric sequence.
 719      *
 720      * @param length Number of digits.
 721      * @param type   Type of token to report against.
 722      * @return Value of sequence or < 0 if no digits.
 723      */
 724     private int hexSequence(final int length, final TokenType type) {
 725         int value = 0;
 726 
 727         for (int i = 0; i < length; i++) {
 728             final int digit = convertDigit(ch0, 16);
 729 
 730             if (digit == -1) {
 731                 error(Lexer.message("invalid.hex"), type, position, limit);
 732                 return i == 0 ? -1 : value;
 733             }
 734 
 735             value = digit | value << 4;
 736             skip(1);
 737         }
 738 
 739         return value;
 740     }
 741 
 742     /**
 743      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 744      *
 745      * @return Value of sequence.
 746      */
 747     private int octalSequence() {
 748         int value = 0;
 749 
 750         for (int i = 0; i < 3; i++) {
 751             final int digit = convertDigit(ch0, 8);
 752 
 753             if (digit == -1) {
 754                 break;
 755             }
 756             value = digit | value << 3;
 757             skip(1);
 758 
 759             if (i == 1 && value >= 32) {
 760                 break;
 761             }
 762         }
 763         return value;
 764     }
 765 
 766     /**
 767      * Convert a string to a JavaScript identifier.
 768      *
 769      * @param start  Position in source content.
 770      * @param length Length of token.
 771      * @return Ident string or null if an error.
 772      */
 773     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 774         // Save the current position.
 775         final int savePosition = position;
 776         // End of scan.
 777         final int end = start + length;
 778         // Reset to beginning of content.
 779         reset(start);
 780         // Buffer for recording characters.
 781         final StringBuilder sb = new StringBuilder(length);
 782 
 783         // Scan until end of line or end of file.
 784         while (!atEOF() && position < end && !isEOL(ch0)) {
 785             // If escape character.
 786             if (ch0 == '\\' && ch1 == 'u') {
 787                 skip(2);
 788                 final int ch = hexSequence(4, TokenType.IDENT);
 789                 if (isWhitespace((char)ch)) {
 790                     return null;
 791                 }
 792                 if (ch < 0) {
 793                     sb.append('\\');
 794                     sb.append('u');
 795                 } else {
 796                     sb.append((char)ch);
 797                 }
 798             } else {
 799                 // Add regular character.
 800                 sb.append(ch0);
 801                 skip(1);
 802             }
 803         }
 804 
 805         // Restore position.
 806         reset(savePosition);
 807 
 808         return sb.toString();
 809     }
 810 
 811     /**
 812      * Scan over and identifier or keyword. Handles identifiers containing
 813      * encoded Unicode chars.
 814      *
 815      * Example:
 816      *
 817      * var \u0042 = 44;
 818      */
 819     private void scanIdentifierOrKeyword() {
 820         // Record beginning of identifier.
 821         final int start = position;
 822         // Scan identifier.
 823         final int length = scanIdentifier();
 824         // Check to see if it is a keyword.
 825         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 826         if (type == FUNCTION && pauseOnFunctionBody) {
 827             pauseOnNextLeftBrace = true;
 828         }
 829         // Add keyword or identifier token.
 830         add(type, start);
 831     }
 832 
 833     /**
 834      * Convert a string to a JavaScript string object.
 835      *
 836      * @param start  Position in source content.
 837      * @param length Length of token.
 838      * @return JavaScript string object.
 839      */
 840     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 841         // Save the current position.
 842         final int savePosition = position;
 843         // Calculate the end position.
 844         final int end = start + length;
 845         // Reset to beginning of string.
 846         reset(start);
 847 
 848         // Buffer for recording characters.
 849         final StringBuilder sb = new StringBuilder(length);
 850 
 851         // Scan until end of string.
 852         while (position < end) {
 853             // If escape character.
 854             if (ch0 == '\\') {
 855                 skip(1);
 856 
 857                 final char next = ch0;
 858                 final int afterSlash = position;
 859 
 860                 skip(1);
 861 
 862                 // Special characters.
 863                 switch (next) {
 864                 case '0':
 865                 case '1':
 866                 case '2':
 867                 case '3':
 868                 case '4':
 869                 case '5':
 870                 case '6':
 871                 case '7': {
 872                     if (strict) {
 873                         // "\0" itself is allowed in strict mode. Only other 'real'
 874                         // octal escape sequences are not allowed (eg. "\02", "\31").
 875                         // See section 7.8.4 String literals production EscapeSequence
 876                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 877                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 878                         }
 879                     }
 880                     reset(afterSlash);
 881                     // Octal sequence.
 882                     final int ch = octalSequence();
 883 
 884                     if (ch < 0) {
 885                         sb.append('\\');
 886                         sb.append('x');
 887                     } else {
 888                         sb.append((char)ch);
 889                     }
 890                     break;
 891                 }
 892                 case 'n':
 893                     sb.append('\n');
 894                     break;
 895                 case 't':
 896                     sb.append('\t');
 897                     break;
 898                 case 'b':
 899                     sb.append('\b');
 900                     break;
 901                 case 'f':
 902                     sb.append('\f');
 903                     break;
 904                 case 'r':
 905                     sb.append('\r');
 906                     break;
 907                 case '\'':
 908                     sb.append('\'');
 909                     break;
 910                 case '\"':
 911                     sb.append('\"');
 912                     break;
 913                 case '\\':
 914                     sb.append('\\');
 915                     break;
 916                 case '\r': // CR | CRLF
 917                     if (ch0 == '\n') {
 918                         skip(1);
 919                     }
 920                     // fall through
 921                 case '\n': // LF
 922                 case '\u2028': // LS
 923                 case '\u2029': // PS
 924                     // continue on the next line, slash-return continues string
 925                     // literal
 926                     break;
 927                 case 'x': {
 928                     // Hex sequence.
 929                     final int ch = hexSequence(2, STRING);
 930 
 931                     if (ch < 0) {
 932                         sb.append('\\');
 933                         sb.append('x');
 934                     } else {
 935                         sb.append((char)ch);
 936                     }
 937                 }
 938                     break;
 939                 case 'u': {
 940                     // Unicode sequence.
 941                     final int ch = hexSequence(4, STRING);
 942 
 943                     if (ch < 0) {
 944                         sb.append('\\');
 945                         sb.append('u');
 946                     } else {
 947                         sb.append((char)ch);
 948                     }
 949                 }
 950                     break;
 951                 case 'v':
 952                     sb.append('\u000B');
 953                     break;
 954                 // All other characters.
 955                 default:
 956                     sb.append(next);
 957                     break;
 958                 }
 959             } else if (ch0 == '\r') {
 960                 // Convert CR-LF or CR to LF line terminator.
 961                 sb.append('\n');
 962                 skip(ch1 == '\n' ? 2 : 1);
 963             } else {
 964                 // Add regular character.
 965                 sb.append(ch0);
 966                 skip(1);
 967             }
 968         }
 969 
 970         // Restore position.
 971         reset(savePosition);
 972 
 973         return sb.toString();
 974     }
 975 
 976     /**
 977      * Scan over a string literal.
 978      * @param add true if we are not just scanning but should actually modify the token stream
 979      */
 980     protected void scanString(final boolean add) {
 981         // Type of string.
 982         TokenType type = STRING;
 983         // Record starting quote.
 984         final char quote = ch0;
 985         // Skip over quote.
 986         skip(1);
 987 
 988         // Record beginning of string content.
 989         final State stringState = saveState();
 990 
 991         // Scan until close quote or end of line.
 992         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 993             // Skip over escaped character.
 994             if (ch0 == '\\') {
 995                 type = ESCSTRING;
 996                 skip(1);
 997                 if (! isEscapeCharacter(ch0)) {
 998                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 999                 }
1000                 if (isEOL(ch0)) {
1001                     // Multiline string literal
1002                     skipEOL(false);
1003                     continue;
1004                 }
1005             }
1006             // Skip literal character.
1007             skip(1);
1008         }
1009 
1010         // If close quote.
1011         if (ch0 == quote) {
1012             // Skip close quote.
1013             skip(1);
1014         } else {
1015             error(Lexer.message("missing.close.quote"), STRING, position, limit);
1016         }
1017 
1018         // If not just scanning.
1019         if (add) {
1020             // Record end of string.
1021             stringState.setLimit(position - 1);
1022 
1023             if (scripting && !stringState.isEmpty()) {
1024                 switch (quote) {
1025                 case '`':
1026                     // Mark the beginning of an exec string.
1027                     add(EXECSTRING, stringState.position, stringState.limit);
1028                     // Frame edit string with left brace.
1029                     add(LBRACE, stringState.position, stringState.position);
1030                     // Process edit string.
1031                     editString(type, stringState);
1032                     // Frame edit string with right brace.
1033                     add(RBRACE, stringState.limit, stringState.limit);
1034                     break;
1035                 case '"':
1036                     // Only edit double quoted strings.
1037                     editString(type, stringState);
1038                     break;
1039                 case '\'':
1040                     // Add string token without editing.
1041                     add(type, stringState.position, stringState.limit);
1042                     break;
1043                 default:
1044                     break;
1045                 }
1046             } else {
1047                 /// Add string token without editing.
1048                 add(type, stringState.position, stringState.limit);
1049             }
1050         }
1051     }
1052 
1053     /**
1054      * Scan over a template string literal.
1055      */
1056     private void scanTemplate() {
1057         assert ch0 == '`';
1058         TokenType type = TEMPLATE;
1059 
1060         // Skip over quote and record beginning of string content.
1061         skip(1);
1062         State stringState = saveState();
1063 
1064         // Scan until close quote
1065         while (!atEOF()) {
1066             // Skip over escaped character.
1067             if (ch0 == '`') {
1068                 skip(1);
1069                 // Record end of string.
1070                 stringState.setLimit(position - 1);
1071                 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1072                 return;
1073             } else if (ch0 == '$' && ch1 == '{') {
1074                 skip(2);
1075                 stringState.setLimit(position - 2);
1076                 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1077 
1078                 // scan to RBRACE
1079                 final Lexer expressionLexer = new Lexer(this, saveState());
1080                 expressionLexer.templateExpressionOpenBraces = 1;
1081                 expressionLexer.lexify();
1082                 restoreState(expressionLexer.saveState());
1083 
1084                 // scan next middle or tail of the template literal
1085                 assert ch0 == '}';
1086                 type = TEMPLATE_MIDDLE;
1087 
1088                 // Skip over rbrace and record beginning of string content.
1089                 skip(1);
1090                 stringState = saveState();
1091 
1092                 continue;
1093             } else if (ch0 == '\\') {
1094                 skip(1);
1095                 // EscapeSequence
1096                 if (!isEscapeCharacter(ch0)) {
1097                     error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1098                 }
1099                 if (isEOL(ch0)) {
1100                     // LineContinuation
1101                     skipEOL(false);
1102                     continue;
1103                 }
1104             }  else if (isEOL(ch0)) {
1105                 // LineTerminatorSequence
1106                 skipEOL(false);
1107                 continue;
1108             }
1109 
1110             // Skip literal character.
1111             skip(1);
1112         }
1113 
1114         error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1115     }
1116 
1117     /**
1118      * Is the given character a valid escape char after "\" ?
1119      *
1120      * @param ch character to be checked
1121      * @return if the given character is valid after "\"
1122      */
1123     protected boolean isEscapeCharacter(final char ch) {
1124         return true;
1125     }
1126 
1127     /**
1128      * Convert string to number.
1129      *
1130      * @param valueString  String to convert.
1131      * @param radix        Numeric base.
1132      * @return Converted number.
1133      */
1134     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1135         try {
1136             return Integer.parseInt(valueString, radix);
1137         } catch (final NumberFormatException e) {
1138             if (radix == 10) {
1139                 return Double.valueOf(valueString);
1140             }
1141 
1142             double value = 0.0;
1143 
1144             for (int i = 0; i < valueString.length(); i++) {
1145                 final char ch = valueString.charAt(i);
1146                 // Preverified, should always be a valid digit.
1147                 final int digit = convertDigit(ch, radix);
1148                 value *= radix;
1149                 value += digit;
1150             }
1151 
1152             return value;
1153         }
1154     }
1155 
1156     /**
1157      * Scan a number.
1158      */
1159     protected void scanNumber() {
1160         // Record beginning of number.
1161         final int start = position;
1162         // Assume value is a decimal.
1163         TokenType type = DECIMAL;
1164 
1165         // First digit of number.
1166         int digit = convertDigit(ch0, 10);
1167 
1168         // If number begins with 0x.
1169         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1170             // Skip over 0xN.
1171             skip(3);
1172             // Skip over remaining digits.
1173             while (convertDigit(ch0, 16) != -1) {
1174                 skip(1);
1175             }
1176 
1177             type = HEXADECIMAL;
1178         } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1179             // Skip over 0oN.
1180             skip(3);
1181             // Skip over remaining digits.
1182             while (convertDigit(ch0, 8) != -1) {
1183                 skip(1);
1184             }
1185 
1186             type = OCTAL;
1187         } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1188             // Skip over 0bN.
1189             skip(3);
1190             // Skip over remaining digits.
1191             while (convertDigit(ch0, 2) != -1) {
1192                 skip(1);
1193             }
1194 
1195             type = BINARY_NUMBER;
1196         } else {
1197             // Check for possible octal constant.
1198             boolean octal = digit == 0;
1199             // Skip first digit if not leading '.'.
1200             if (digit != -1) {
1201                 skip(1);
1202             }
1203 
1204             // Skip remaining digits.
1205             while ((digit = convertDigit(ch0, 10)) != -1) {
1206                 // Check octal only digits.
1207                 octal = octal && digit < 8;
1208                 // Skip digit.
1209                 skip(1);
1210             }
1211 
1212             if (octal && position - start > 1) {
1213                 type = OCTAL_LEGACY;
1214             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1215                 // Must be a double.
1216                 if (ch0 == '.') {
1217                     // Skip period.
1218                     skip(1);
1219                     // Skip mantissa.
1220                     while (convertDigit(ch0, 10) != -1) {
1221                         skip(1);
1222                     }
1223                 }
1224 
1225                 // Detect exponent.
1226                 if (ch0 == 'E' || ch0 == 'e') {
1227                     // Skip E.
1228                     skip(1);
1229                     // Detect and skip exponent sign.
1230                     if (ch0 == '+' || ch0 == '-') {
1231                         skip(1);
1232                     }
1233                     // Skip exponent.
1234                     while (convertDigit(ch0, 10) != -1) {
1235                         skip(1);
1236                     }
1237                 }
1238 
1239                 type = FLOATING;
1240             }
1241         }
1242 
1243         if (Character.isJavaIdentifierStart(ch0)) {
1244             error(Lexer.message("missing.space.after.number"), type, position, 1);
1245         }
1246 
1247         // Add number token.
1248         add(type, start);
1249     }
1250 
1251     /**
1252      * Convert a regex token to a token object.
1253      *
1254      * @param start  Position in source content.
1255      * @param length Length of regex token.
1256      * @return Regex token object.
1257      */
1258     XMLToken valueOfXML(final int start, final int length) {
1259         return new XMLToken(source.getString(start, length));
1260     }
1261 
1262     /**
1263      * Scan over a XML token.
1264      *
1265      * @return TRUE if is an XML literal.
1266      */
1267     private boolean scanXMLLiteral() {
1268         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1269         if (XML_LITERALS) {
1270             // Record beginning of xml expression.
1271             final int start = position;
1272 
1273             int openCount = 0;
1274 
1275             do {
1276                 if (ch0 == '<') {
1277                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1278                         skip(3);
1279                         openCount--;
1280                     } else if (Character.isJavaIdentifierStart(ch1)) {
1281                         skip(2);
1282                         openCount++;
1283                     } else if (ch1 == '?') {
1284                         skip(2);
1285                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1286                         skip(4);
1287                     } else {
1288                         reset(start);
1289                         return false;
1290                     }
1291 
1292                     while (!atEOF() && ch0 != '>') {
1293                         if (ch0 == '/' && ch1 == '>') {
1294                             openCount--;
1295                             skip(1);
1296                             break;
1297                         } else if (ch0 == '\"' || ch0 == '\'') {
1298                             scanString(false);
1299                         } else {
1300                             skip(1);
1301                         }
1302                     }
1303 
1304                     if (ch0 != '>') {
1305                         reset(start);
1306                         return false;
1307                     }
1308 
1309                     skip(1);
1310                 } else if (atEOF()) {
1311                     reset(start);
1312                     return false;
1313                 } else {
1314                     skip(1);
1315                 }
1316             } while (openCount > 0);
1317 
1318             add(XML, start);
1319             return true;
1320         }
1321 
1322         return false;
1323     }
1324 
1325     /**
1326      * Scan over identifier characters.
1327      *
1328      * @return Length of identifier or zero if none found.
1329      */
1330     private int scanIdentifier() {
1331         final int start = position;
1332 
1333         // Make sure first character is valid start character.
1334         if (ch0 == '\\' && ch1 == 'u') {
1335             skip(2);
1336             final int ch = hexSequence(4, TokenType.IDENT);
1337 
1338             if (!Character.isJavaIdentifierStart(ch)) {
1339                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1340             }
1341         } else if (!Character.isJavaIdentifierStart(ch0)) {
1342             // Not an identifier.
1343             return 0;
1344         }
1345 
1346         // Make sure remaining characters are valid part characters.
1347         while (!atEOF()) {
1348             if (ch0 == '\\' && ch1 == 'u') {
1349                 skip(2);
1350                 final int ch = hexSequence(4, TokenType.IDENT);
1351 
1352                 if (!Character.isJavaIdentifierPart(ch)) {
1353                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1354                 }
1355             } else if (Character.isJavaIdentifierPart(ch0)) {
1356                 skip(1);
1357             } else {
1358                 break;
1359             }
1360         }
1361 
1362         // Length of identifier sequence.
1363         return position - start;
1364     }
1365 
1366     /**
1367      * Compare two identifiers (in content) for equality.
1368      *
1369      * @param aStart  Start of first identifier.
1370      * @param aLength Length of first identifier.
1371      * @param bStart  Start of second identifier.
1372      * @param bLength Length of second identifier.
1373      * @return True if equal.
1374      */
1375     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1376         if (aLength == bLength) {
1377             for (int i = 0; i < aLength; i++) {
1378                 if (content[aStart + i] != content[bStart + i]) {
1379                     return false;
1380                 }
1381             }
1382 
1383             return true;
1384         }
1385 
1386         return false;
1387     }
1388 
1389     /**
1390      * Detect if a line starts with a marker identifier.
1391      *
1392      * @param identStart  Start of identifier.
1393      * @param identLength Length of identifier.
1394      * @return True if detected.
1395      */
1396     private boolean hasHereMarker(final int identStart, final int identLength) {
1397         // Skip any whitespace.
1398         skipWhitespace(false);
1399 
1400         return identifierEqual(identStart, identLength, position, scanIdentifier());
1401     }
1402 
1403     /**
1404      * Lexer to service edit strings.
1405      */
1406     private static class EditStringLexer extends Lexer {
1407         /** Type of string literals to emit. */
1408         final TokenType stringType;
1409 
1410         /*
1411          * Constructor.
1412          */
1413 
1414         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1415             super(lexer, stringState);
1416 
1417             this.stringType = stringType;
1418         }
1419 
1420         /**
1421          * Lexify the contents of the string.
1422          */
1423         @Override
1424         public void lexify() {
1425             // Record start of string position.
1426             int stringStart = position;
1427             // Indicate that the priming first string has not been emitted.
1428             boolean primed = false;
1429 
1430             while (true) {
1431                 // Detect end of content.
1432                 if (atEOF()) {
1433                     break;
1434                 }
1435 
1436                 // Honour escapes (should be well formed.)
1437                 if (ch0 == '\\' && stringType == ESCSTRING) {
1438                     skip(2);
1439 
1440                     continue;
1441                 }
1442 
1443                 // If start of expression.
1444                 if (ch0 == '$' && ch1 == '{') {
1445                     if (!primed || stringStart != position) {
1446                         if (primed) {
1447                             add(ADD, stringStart, stringStart + 1);
1448                         }
1449 
1450                         add(stringType, stringStart, position);
1451                         primed = true;
1452                     }
1453 
1454                     // Skip ${
1455                     skip(2);
1456 
1457                     // Save expression state.
1458                     final State expressionState = saveState();
1459 
1460                     // Start with one open brace.
1461                     int braceCount = 1;
1462 
1463                     // Scan for the rest of the string.
1464                     while (!atEOF()) {
1465                         // If closing brace.
1466                         if (ch0 == '}') {
1467                             // Break only only if matching brace.
1468                             if (--braceCount == 0) {
1469                                 break;
1470                             }
1471                         } else if (ch0 == '{') {
1472                             // Bump up the brace count.
1473                             braceCount++;
1474                         }
1475 
1476                         // Skip to next character.
1477                         skip(1);
1478                     }
1479 
1480                     // If braces don't match then report an error.
1481                     if (braceCount != 0) {
1482                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1483                     }
1484 
1485                     // Mark end of expression.
1486                     expressionState.setLimit(position);
1487                     // Skip closing brace.
1488                     skip(1);
1489 
1490                     // Start next string.
1491                     stringStart = position;
1492 
1493                     // Concatenate expression.
1494                     add(ADD, expressionState.position, expressionState.position + 1);
1495                     add(LPAREN, expressionState.position, expressionState.position + 1);
1496 
1497                     // Scan expression.
1498                     final Lexer lexer = new Lexer(this, expressionState);
1499                     lexer.lexify();
1500 
1501                     // Close out expression parenthesis.
1502                     add(RPAREN, position - 1, position);
1503 
1504                     continue;
1505                 }
1506 
1507                 // Next character in string.
1508                 skip(1);
1509             }
1510 
1511             // If there is any unemitted string portion.
1512             if (stringStart != limit) {
1513                 // Concatenate remaining string.
1514                 if (primed) {
1515                     add(ADD, stringStart, 1);
1516                 }
1517 
1518                 add(stringType, stringStart, limit);
1519             }
1520         }
1521 
1522     }
1523 
1524     /**
1525      * Edit string for nested expressions.
1526      *
1527      * @param stringType  Type of string literals to emit.
1528      * @param stringState State of lexer at start of string.
1529      */
1530     private void editString(final TokenType stringType, final State stringState) {
1531         // Use special lexer to scan string.
1532         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1533         lexer.lexify();
1534 
1535         // Need to keep lexer informed.
1536         last = stringType;
1537     }
1538 
1539     /**
1540      * Scan over a here string.
1541      *
1542      * @return TRUE if is a here string.
1543      */
1544     private boolean scanHereString(final LineInfoReceiver lir, final State oldState) {
1545         assert ch0 == '<' && ch1 == '<';
1546         if (scripting) {
1547             // Record beginning of here string.
1548             final State saved = saveState();
1549 
1550             // << or <<<
1551             final boolean excludeLastEOL = ch2 != '<';
1552 
1553             if (excludeLastEOL) {
1554                 skip(2);
1555             } else {
1556                 skip(3);
1557             }
1558 
1559             // Scan identifier. It might be quoted, indicating that no string editing should take place.
1560             final char quoteChar = ch0;
1561             final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1562             if (noStringEditing) {
1563                 skip(1);
1564             }
1565             final int identStart = position;
1566             final int identLength = scanIdentifier();
1567             if (noStringEditing) {
1568                 if (ch0 != quoteChar) {
1569                     error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1570                     restoreState(saved);
1571                     return false;
1572                 }
1573                 skip(1);
1574             }
1575 
1576             // Check for identifier.
1577             if (identLength == 0) {
1578                 // Treat as shift.
1579                 restoreState(saved);
1580 
1581                 return false;
1582             }
1583 
1584             // Record rest of line.
1585             final State restState = saveState();
1586             // keep line number updated
1587             int lastLine = line;
1588 
1589             skipLine(false);
1590             lastLine++;
1591             int lastLinePosition = position;
1592             restState.setLimit(position);
1593 
1594             if (oldState.position > position) {
1595                 restoreState(oldState);
1596                 skipLine(false);
1597             }
1598 
1599             // Record beginning of string.
1600             final State stringState = saveState();
1601             int stringEnd = position;
1602 
1603             // Hunt down marker.
1604             while (!atEOF()) {
1605                 // Skip any whitespace.
1606                 skipWhitespace(false);
1607 
1608                 if (hasHereMarker(identStart, identLength)) {
1609                     break;
1610                 }
1611 
1612                 skipLine(false);
1613                 lastLine++;
1614                 lastLinePosition = position;
1615                 stringEnd = position;
1616             }
1617 
1618             // notify last line information
1619             lir.lineInfo(lastLine, lastLinePosition);
1620 
1621             // Record end of string.
1622             stringState.setLimit(stringEnd);
1623 
1624             // If marker is missing.
1625             if (stringState.isEmpty() || atEOF()) {
1626                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1627                 restoreState(saved);
1628 
1629                 return false;
1630             }
1631 
1632             // Remove last end of line if specified.
1633             if (excludeLastEOL) {
1634                 // Handles \n.
1635                 if (content[stringEnd - 1] == '\n') {
1636                     stringEnd--;
1637                 }
1638 
1639                 // Handles \r and \r\n.
1640                 if (content[stringEnd - 1] == '\r') {
1641                     stringEnd--;
1642                 }
1643 
1644                 // Update end of string.
1645                 stringState.setLimit(stringEnd);
1646             }
1647 
1648             // Edit string if appropriate.
1649             if (!noStringEditing && !stringState.isEmpty()) {
1650                 editString(STRING, stringState);
1651             } else {
1652                 // Add here string.
1653                 add(STRING, stringState.position, stringState.limit);
1654             }
1655 
1656             // Scan rest of original line.
1657             final Lexer restLexer = new Lexer(this, restState);
1658 
1659             restLexer.lexify();
1660 
1661             return true;
1662         }
1663 
1664         return false;
1665     }
1666 
1667     /**
1668      * Breaks source content down into lex units, adding tokens to the token
1669      * stream. The routine scans until the stream buffer is full. Can be called
1670      * repeatedly until EOF is detected.
1671      */
1672     public void lexify() {
1673         while (!stream.isFull() || nested) {
1674             // Skip over whitespace.
1675             skipWhitespace(true);
1676 
1677             // Detect end of file.
1678             if (atEOF()) {
1679                 if (!nested) {
1680                     // Add an EOF token at the end.
1681                     add(EOF, position);
1682                 }
1683 
1684                 break;
1685             }
1686 
1687             // Check for comments. Note that we don't scan for regexp and other literals here as
1688             // we may not have enough context to distinguish them from similar looking operators.
1689             // Instead we break on ambiguous operators below and let the parser decide.
1690             if (ch0 == '/' && skipComments()) {
1691                 continue;
1692             }
1693 
1694             if (scripting && ch0 == '#' && skipComments()) {
1695                 continue;
1696             }
1697 
1698             // TokenType for lookup of delimiter or operator.
1699             TokenType type;
1700 
1701             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1702                 // '.' followed by digit.
1703                 // Scan and add a number.
1704                 scanNumber();
1705             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1706                 if (templateExpressionOpenBraces > 0) {
1707                     if (type == LBRACE) {
1708                         templateExpressionOpenBraces++;
1709                     } else if (type == RBRACE) {
1710                         if (--templateExpressionOpenBraces == 0) {
1711                             break;
1712                         }
1713                     }
1714                 }
1715 
1716                 // Get the number of characters in the token.
1717                 final int typeLength = type.getLength();
1718                 // Skip that many characters.
1719                 skip(typeLength);
1720                 // Add operator token.
1721                 add(type, position - typeLength);
1722                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1723                 // We break to let the parser decide what it is.
1724                 if (canStartLiteral(type)) {
1725                     break;
1726                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1727                     pauseOnNextLeftBrace = false;
1728                     break;
1729                 }
1730             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1731                 // Scan and add identifier or keyword.
1732                 scanIdentifierOrKeyword();
1733             } else if (isStringDelimiter(ch0)) {
1734                 // Scan and add a string.
1735                 scanString(true);
1736             } else if (Character.isDigit(ch0)) {
1737                 // Scan and add a number.
1738                 scanNumber();
1739             } else if (isTemplateDelimiter(ch0) && es6) {
1740                 // Scan and add template in ES6 mode.
1741                 scanTemplate();
1742             } else if (isTemplateDelimiter(ch0) && scripting) {
1743                 // Scan and add an exec string ('`') in scripting mode.
1744                 scanString(true);
1745             } else {
1746                 // Don't recognize this character.
1747                 skip(1);
1748                 add(ERROR, position - 1);
1749             }
1750         }
1751     }
1752 
1753     /**
1754      * Return value of token given its token descriptor.
1755      *
1756      * @param token  Token descriptor.
1757      * @return JavaScript value.
1758      */
1759     Object getValueOf(final long token, final boolean strict) {
1760         final int start = Token.descPosition(token);
1761         final int len   = Token.descLength(token);
1762 
1763         switch (Token.descType(token)) {
1764         case DECIMAL:
1765             return Lexer.valueOf(source.getString(start, len), 10); // number
1766         case HEXADECIMAL:
1767             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1768         case OCTAL_LEGACY:
1769             return Lexer.valueOf(source.getString(start, len), 8); // number
1770         case OCTAL:
1771             return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1772         case BINARY_NUMBER:
1773             return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1774         case FLOATING:
1775             final String str   = source.getString(start, len);
1776             final double value = Double.valueOf(str);
1777             if (str.indexOf('.') != -1) {
1778                 return value; //number
1779             }
1780             //anything without an explicit decimal point is still subject to a
1781             //"representable as int or long" check. Then the programmer does not
1782             //explicitly code something as a double. For example new Color(int, int, int)
1783             //and new Color(float, float, float) will get ambiguous for cases like
1784             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1785             //yet we don't want e.g. 1e6 to be a double unnecessarily
1786             if (JSType.isStrictlyRepresentableAsInt(value)) {
1787                 return (int)value;
1788             }
1789             return value;
1790         case STRING:
1791             return source.getString(start, len); // String
1792         case ESCSTRING:
1793             return valueOfString(start, len, strict); // String
1794         case IDENT:
1795             return valueOfIdent(start, len); // String
1796         case REGEX:
1797             return valueOfPattern(start, len); // RegexToken::LexerToken
1798         case TEMPLATE:
1799         case TEMPLATE_HEAD:
1800         case TEMPLATE_MIDDLE:
1801         case TEMPLATE_TAIL:
1802             return valueOfString(start, len, true); // String
1803         case XML:
1804             return valueOfXML(start, len); // XMLToken::LexerToken
1805         case DIRECTIVE_COMMENT:
1806             return source.getString(start, len);
1807         default:
1808             break;
1809         }
1810 
1811         return null;
1812     }
1813 
1814     /**
1815      * Get the raw string value of a template literal string part.
1816      *
1817      * @param token template string token
1818      * @return raw string
1819      */
1820     public String valueOfRawString(final long token) {
1821         final int start  = Token.descPosition(token);
1822         final int length = Token.descLength(token);
1823 
1824         // Save the current position.
1825         final int savePosition = position;
1826         // Calculate the end position.
1827         final int end = start + length;
1828         // Reset to beginning of string.
1829         reset(start);
1830 
1831         // Buffer for recording characters.
1832         final StringBuilder sb = new StringBuilder(length);
1833 
1834         // Scan until end of string.
1835         while (position < end) {
1836             if (ch0 == '\r') {
1837                 // Convert CR-LF or CR to LF line terminator.
1838                 sb.append('\n');
1839                 skip(ch1 == '\n' ? 2 : 1);
1840             } else {
1841                 // Add regular character.
1842                 sb.append(ch0);
1843                 skip(1);
1844             }
1845         }
1846 
1847         // Restore position.
1848         reset(savePosition);
1849 
1850         return sb.toString();
1851     }
1852 
1853     /**
1854      * Get the correctly localized error message for a given message id format arguments
1855      * @param msgId message id
1856      * @param args  format arguments
1857      * @return message
1858      */
1859     protected static String message(final String msgId, final String... args) {
1860         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1861     }
1862 
1863     /**
1864      * Generate a runtime exception
1865      *
1866      * @param message       error message
1867      * @param type          token type
1868      * @param start         start position of lexed error
1869      * @param length        length of lexed error
1870      * @throws ParserException  unconditionally
1871      */
1872     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1873         final long token     = Token.toDesc(type, start, length);
1874         final int  pos       = Token.descPosition(token);
1875         final int  lineNum   = source.getLine(pos);
1876         final int  columnNum = source.getColumn(pos);
1877         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1878         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1879     }
1880 
1881     /**
1882      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1883      * This is the abstract superclass
1884      */
1885     public static abstract class LexerToken implements Serializable {
1886         private static final long serialVersionUID = 1L;
1887 
1888         private final String expression;
1889 
1890         /**
1891          * Constructor
1892          * @param expression token expression
1893          */
1894         protected LexerToken(final String expression) {
1895             this.expression = expression;
1896         }
1897 
1898         /**
1899          * Get the expression
1900          * @return expression
1901          */
1902         public String getExpression() {
1903             return expression;
1904         }
1905     }
1906 
1907     /**
1908      * Temporary container for regular expressions.
1909      */
1910     public static class RegexToken extends LexerToken {
1911         private static final long serialVersionUID = 1L;
1912 
1913         /** Options. */
1914         private final String options;
1915 
1916         /**
1917          * Constructor.
1918          *
1919          * @param expression  regexp expression
1920          * @param options     regexp options
1921          */
1922         public RegexToken(final String expression, final String options) {
1923             super(expression);
1924             this.options = options;
1925         }
1926 
1927         /**
1928          * Get regexp options
1929          * @return options
1930          */
1931         public String getOptions() {
1932             return options;
1933         }
1934 
1935         @Override
1936         public String toString() {
1937             return '/' + getExpression() + '/' + options;
1938         }
1939     }
1940 
1941     /**
1942      * Temporary container for XML expression.
1943      */
1944     public static class XMLToken extends LexerToken {
1945         private static final long serialVersionUID = 1L;
1946 
1947         /**
1948          * Constructor.
1949          *
1950          * @param expression  XML expression
1951          */
1952         public XMLToken(final String expression) {
1953             super(expression);
1954         }
1955     }
1956 }