1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
  30 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  33 import static jdk.nashorn.internal.parser.TokenType.EOF;
  34 import static jdk.nashorn.internal.parser.TokenType.EOL;
  35 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  38 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  41 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  42 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  43 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
  45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  48 import static jdk.nashorn.internal.parser.TokenType.STRING;
  49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
  50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
  51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
  52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
  53 import static jdk.nashorn.internal.parser.TokenType.XML;
  54 
  55 import java.io.Serializable;
  56 
  57 import jdk.nashorn.internal.runtime.ECMAErrors;
  58 import jdk.nashorn.internal.runtime.ErrorManager;
  59 import jdk.nashorn.internal.runtime.JSErrorType;
  60 import jdk.nashorn.internal.runtime.JSType;
  61 import jdk.nashorn.internal.runtime.ParserException;
  62 import jdk.nashorn.internal.runtime.Source;
  63 import jdk.nashorn.internal.runtime.options.Options;
  64 
  65 /**
  66  * Responsible for converting source content into a stream of tokens.
  67  *
  68  */
  69 @SuppressWarnings("fallthrough")
  70 public class Lexer extends Scanner {
  71     private static final long MIN_INT_L = Integer.MIN_VALUE;
  72     private static final long MAX_INT_L = Integer.MAX_VALUE;
  73 
  74     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  75 
  76     /** Content source. */
  77     private final Source source;
  78 
  79     /** Buffered stream for tokens. */
  80     private final TokenStream stream;
  81 
  82     /** True if here and edit strings are supported. */
  83     private final boolean scripting;
  84 
  85     /** True if parsing in ECMAScript 6 mode. */
  86     private final boolean es6;
  87 
  88     /** True if a nested scan. (scan to completion, no EOF.) */
  89     private final boolean nested;
  90 
  91     /** Pending new line number and position. */
  92     int pendingLine;
  93 
  94     /** Position of last EOL + 1. */
  95     private int linePosition;
  96 
  97     /** Type of last token added. */
  98     private TokenType last;
  99 
 100     private final boolean pauseOnFunctionBody;
 101     private boolean pauseOnNextLeftBrace;
 102 
 103     private int templateExpressionOpenBraces;
 104 
 105     private static final String SPACETAB = " \t";  // ASCII space and tab
 106     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
 107 
 108     private static final String JAVASCRIPT_WHITESPACE_EOL =
 109         LFCR +
 110         "\u2028" + // line separator
 111         "\u2029"   // paragraph separator
 112         ;
 113     private static final String JAVASCRIPT_WHITESPACE =
 114         SPACETAB +
 115         JAVASCRIPT_WHITESPACE_EOL +
 116         "\u000b" + // tabulation line
 117         "\u000c" + // ff (ctrl-l)
 118         "\u00a0" + // Latin-1 space
 119         "\u1680" + // Ogham space mark
 120         "\u180e" + // separator, Mongolian vowel
 121         "\u2000" + // en quad
 122         "\u2001" + // em quad
 123         "\u2002" + // en space
 124         "\u2003" + // em space
 125         "\u2004" + // three-per-em space
 126         "\u2005" + // four-per-em space
 127         "\u2006" + // six-per-em space
 128         "\u2007" + // figure space
 129         "\u2008" + // punctuation space
 130         "\u2009" + // thin space
 131         "\u200a" + // hair space
 132         "\u202f" + // narrow no-break space
 133         "\u205f" + // medium mathematical space
 134         "\u3000" + // ideographic space
 135         "\ufeff"   // byte order mark
 136         ;
 137 
 138     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 139         "\\u000a" + // line feed
 140         "\\u000d" + // carriage return (ctrl-m)
 141         "\\u2028" + // line separator
 142         "\\u2029" + // paragraph separator
 143         "\\u0009" + // tab
 144         "\\u0020" + // ASCII space
 145         "\\u000b" + // tabulation line
 146         "\\u000c" + // ff (ctrl-l)
 147         "\\u00a0" + // Latin-1 space
 148         "\\u1680" + // Ogham space mark
 149         "\\u180e" + // separator, Mongolian vowel
 150         "\\u2000" + // en quad
 151         "\\u2001" + // em quad
 152         "\\u2002" + // en space
 153         "\\u2003" + // em space
 154         "\\u2004" + // three-per-em space
 155         "\\u2005" + // four-per-em space
 156         "\\u2006" + // six-per-em space
 157         "\\u2007" + // figure space
 158         "\\u2008" + // punctuation space
 159         "\\u2009" + // thin space
 160         "\\u200a" + // hair space
 161         "\\u202f" + // narrow no-break space
 162         "\\u205f" + // medium mathematical space
 163         "\\u3000" + // ideographic space
 164         "\\ufeff"   // byte order mark
 165         ;
 166 
 167     static String unicodeEscape(final char ch) {
 168         final StringBuilder sb = new StringBuilder();
 169 
 170         sb.append("\\u");
 171 
 172         final String hex = Integer.toHexString(ch);
 173         for (int i = hex.length(); i < 4; i++) {
 174             sb.append('0');
 175         }
 176         sb.append(hex);
 177 
 178         return sb.toString();
 179     }
 180 
 181     /**
 182      * Constructor
 183      *
 184      * @param source    the source
 185      * @param stream    the token stream to lex
 186      */
 187     public Lexer(final Source source, final TokenStream stream) {
 188         this(source, stream, false, false);
 189     }
 190 
 191     /**
 192      * Constructor
 193      *
 194      * @param source    the source
 195      * @param stream    the token stream to lex
 196      * @param scripting are we in scripting mode
 197      * @param es6       are we in ECMAScript 6 mode
 198      */
 199     public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
 200         this(source, 0, source.getLength(), stream, scripting, es6, false);
 201     }
 202 
 203     /**
 204      * Constructor
 205      *
 206      * @param source    the source
 207      * @param start     start position in source from which to start lexing
 208      * @param len       length of source segment to lex
 209      * @param stream    token stream to lex
 210      * @param scripting are we in scripting mode
 211      * @param es6       are we in ECMAScript 6 mode
 212      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 213      * function body. This is used with the feature where the parser is skipping nested function bodies to
 214      * avoid reading ahead unnecessarily when we skip the function bodies.
 215      */
 216     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
 217         super(source.getContent(), 1, start, len);
 218         this.source      = source;
 219         this.stream      = stream;
 220         this.scripting   = scripting;
 221         this.es6         = es6;
 222         this.nested      = false;
 223         this.pendingLine = 1;
 224         this.last        = EOL;
 225 
 226         this.pauseOnFunctionBody = pauseOnFunctionBody;
 227     }
 228 
 229     private Lexer(final Lexer lexer, final State state) {
 230         super(lexer, state);
 231 
 232         source = lexer.source;
 233         stream = lexer.stream;
 234         scripting = lexer.scripting;
 235         es6 = lexer.es6;
 236         nested = true;
 237 
 238         pendingLine = state.pendingLine;
 239         linePosition = state.linePosition;
 240         last = EOL;
 241         pauseOnFunctionBody = false;
 242     }
 243 
 244     static class State extends Scanner.State {
 245         /** Pending new line number and position. */
 246         public final int pendingLine;
 247 
 248         /** Position of last EOL + 1. */
 249         public final int linePosition;
 250 
 251         /** Type of last token added. */
 252         public final TokenType last;
 253 
 254         /*
 255          * Constructor.
 256          */
 257 
 258         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 259             super(position, limit, line);
 260 
 261             this.pendingLine = pendingLine;
 262             this.linePosition = linePosition;
 263             this.last = last;
 264         }
 265     }
 266 
 267     /**
 268      * Save the state of the scan.
 269      *
 270      * @return Captured state.
 271      */
 272     @Override
 273     State saveState() {
 274         return new State(position, limit, line, pendingLine, linePosition, last);
 275     }
 276 
 277     /**
 278      * Restore the state of the scan.
 279      *
 280      * @param state
 281      *            Captured state.
 282      */
 283     void restoreState(final State state) {
 284         super.restoreState(state);
 285 
 286         pendingLine = state.pendingLine;
 287         linePosition = state.linePosition;
 288         last = state.last;
 289     }
 290 
 291     /**
 292      * Add a new token to the stream.
 293      *
 294      * @param type
 295      *            Token type.
 296      * @param start
 297      *            Start position.
 298      * @param end
 299      *            End position.
 300      */
 301     protected void add(final TokenType type, final int start, final int end) {
 302         // Record last token.
 303         last = type;
 304 
 305         // Only emit the last EOL in a cluster.
 306         if (type == EOL) {
 307             pendingLine = end;
 308             linePosition = start;
 309         } else {
 310             // Write any pending EOL to stream.
 311             if (pendingLine != -1) {
 312                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 313                 pendingLine = -1;
 314             }
 315 
 316             // Write token to stream.
 317             stream.put(Token.toDesc(type, start, end - start));
 318         }
 319     }
 320 
 321     /**
 322      * Add a new token to the stream.
 323      *
 324      * @param type
 325      *            Token type.
 326      * @param start
 327      *            Start position.
 328      */
 329     protected void add(final TokenType type, final int start) {
 330         add(type, start, position);
 331     }
 332 
 333     /**
 334      * Return the String of valid whitespace characters for regular
 335      * expressions in JavaScript
 336      * @return regexp whitespace string
 337      */
 338     public static String getWhitespaceRegExp() {
 339         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 340     }
 341 
 342     /**
 343      * Skip end of line.
 344      *
 345      * @param addEOL true if EOL token should be recorded.
 346      */
 347     private void skipEOL(final boolean addEOL) {
 348 
 349         if (ch0 == '\r') { // detect \r\n pattern
 350             skip(1);
 351             if (ch0 == '\n') {
 352                 skip(1);
 353             }
 354         } else { // all other space, ch0 is guaranteed to be EOL or \0
 355             skip(1);
 356         }
 357 
 358         // bump up line count
 359         line++;
 360 
 361         if (addEOL) {
 362             // Add an EOL token.
 363             add(EOL, position, line);
 364         }
 365     }
 366 
 367     /**
 368      * Skip over rest of line including end of line.
 369      *
 370      * @param addEOL true if EOL token should be recorded.
 371      */
 372     private void skipLine(final boolean addEOL) {
 373         // Ignore characters.
 374         while (!isEOL(ch0) && !atEOF()) {
 375             skip(1);
 376         }
 377         // Skip over end of line.
 378         skipEOL(addEOL);
 379     }
 380 
 381     /**
 382      * Test whether a char is valid JavaScript whitespace
 383      * @param ch a char
 384      * @return true if valid JavaScript whitespace
 385      */
 386     public static boolean isJSWhitespace(final char ch) {
 387         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 388     }
 389 
 390     /**
 391      * Test whether a char is valid JavaScript end of line
 392      * @param ch a char
 393      * @return true if valid JavaScript end of line
 394      */
 395     public static boolean isJSEOL(final char ch) {
 396         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 397     }
 398 
 399     /**
 400      * Test if char is a string delimiter, e.g. '\' or '"'.
 401      * @param ch a char
 402      * @return true if string delimiter
 403      */
 404     protected boolean isStringDelimiter(final char ch) {
 405         return ch == '\'' || ch == '"';
 406     }
 407 
 408     /**
 409      * Test if char is a template literal delimiter ('`').
 410      */
 411     private static boolean isTemplateDelimiter(final char ch) {
 412         return ch == '`';
 413     }
 414 
 415     /**
 416      * Test whether a char is valid JavaScript whitespace
 417      * @param ch a char
 418      * @return true if valid JavaScript whitespace
 419      */
 420     protected boolean isWhitespace(final char ch) {
 421         return Lexer.isJSWhitespace(ch);
 422     }
 423 
 424     /**
 425      * Test whether a char is valid JavaScript end of line
 426      * @param ch a char
 427      * @return true if valid JavaScript end of line
 428      */
 429     protected boolean isEOL(final char ch) {
 430         return Lexer.isJSEOL(ch);
 431     }
 432 
 433     /**
 434      * Skip over whitespace and detect end of line, adding EOL tokens if
 435      * encountered.
 436      *
 437      * @param addEOL true if EOL tokens should be recorded.
 438      */
 439     private void skipWhitespace(final boolean addEOL) {
 440         while (isWhitespace(ch0)) {
 441             if (isEOL(ch0)) {
 442                 skipEOL(addEOL);
 443             } else {
 444                 skip(1);
 445             }
 446         }
 447     }
 448 
 449     /**
 450      * Skip over comments.
 451      *
 452      * @return True if a comment.
 453      */
 454     protected boolean skipComments() {
 455         // Save the current position.
 456         final int start = position;
 457 
 458         if (ch0 == '/') {
 459             // Is it a // comment.
 460             if (ch1 == '/') {
 461                 // Skip over //.
 462                 skip(2);
 463 
 464                 boolean directiveComment = false;
 465                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 466                     directiveComment = true;
 467                 }
 468 
 469                 // Scan for EOL.
 470                 while (!atEOF() && !isEOL(ch0)) {
 471                     skip(1);
 472                 }
 473                 // Did detect a comment.
 474                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 475                 return true;
 476             } else if (ch1 == '*') {
 477                 // Skip over /*.
 478                 skip(2);
 479                 // Scan for */.
 480                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 481                     // If end of line handle else skip character.
 482                     if (isEOL(ch0)) {
 483                         skipEOL(true);
 484                     } else {
 485                         skip(1);
 486                     }
 487                 }
 488 
 489                 if (atEOF()) {
 490                     // TODO - Report closing */ missing in parser.
 491                     add(ERROR, start);
 492                 } else {
 493                     // Skip */.
 494                     skip(2);
 495                 }
 496 
 497                 // Did detect a comment.
 498                 add(COMMENT, start);
 499                 return true;
 500             }
 501         } else if (ch0 == '#') {
 502             assert scripting;
 503             // shell style comment
 504             // Skip over #.
 505             skip(1);
 506             // Scan for EOL.
 507             while (!atEOF() && !isEOL(ch0)) {
 508                 skip(1);
 509             }
 510             // Did detect a comment.
 511             add(COMMENT, start);
 512             return true;
 513         }
 514 
 515         // Not a comment.
 516         return false;
 517     }
 518 
 519     /**
 520      * Convert a regex token to a token object.
 521      *
 522      * @param start  Position in source content.
 523      * @param length Length of regex token.
 524      * @return Regex token object.
 525      */
 526     public RegexToken valueOfPattern(final int start, final int length) {
 527         // Save the current position.
 528         final int savePosition = position;
 529         // Reset to beginning of content.
 530         reset(start);
 531         // Buffer for recording characters.
 532         final StringBuilder sb = new StringBuilder(length);
 533 
 534         // Skip /.
 535         skip(1);
 536         boolean inBrackets = false;
 537         // Scan for closing /, stopping at end of line.
 538         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 539             // Skip over escaped character.
 540             if (ch0 == '\\') {
 541                 sb.append(ch0);
 542                 sb.append(ch1);
 543                 skip(2);
 544             } else {
 545                 if (ch0 == '[') {
 546                     inBrackets = true;
 547                 } else if (ch0 == ']') {
 548                     inBrackets = false;
 549                 }
 550 
 551                 // Skip literal character.
 552                 sb.append(ch0);
 553                 skip(1);
 554             }
 555         }
 556 
 557         // Get pattern as string.
 558         final String regex = sb.toString();
 559 
 560         // Skip /.
 561         skip(1);
 562 
 563         // Options as string.
 564         final String options = source.getString(position, scanIdentifier());
 565 
 566         reset(savePosition);
 567 
 568         // Compile the pattern.
 569         return new RegexToken(regex, options);
 570     }
 571 
 572     /**
 573      * Return true if the given token can be the beginning of a literal.
 574      *
 575      * @param token a token
 576      * @return true if token can start a literal.
 577      */
 578     public boolean canStartLiteral(final TokenType token) {
 579         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 580     }
 581 
 582     /**
 583      * interface to receive line information for multi-line literals.
 584      */
 585     protected interface LineInfoReceiver {
 586         /**
 587          * Receives line information
 588          * @param line last line number
 589          * @param linePosition position of last line
 590          */
 591         public void lineInfo(int line, int linePosition);
 592     }
 593 
 594     /**
 595      * Check whether the given token represents the beginning of a literal. If so scan
 596      * the literal and return <tt>true</tt>, otherwise return false.
 597      *
 598      * @param token the token.
 599      * @param startTokenType the token type.
 600      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 601      * @return True if a literal beginning with startToken was found and scanned.
 602      */
 603     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 604         // Check if it can be a literal.
 605         if (!canStartLiteral(startTokenType)) {
 606             return false;
 607         }
 608         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 609         if (stream.get(stream.last()) != token) {
 610             return false;
 611         }
 612 
 613         // Record current position in case multiple heredocs start on this line - see JDK-8073653
 614         final State state = saveState();
 615         // Rewind to token start position
 616         reset(Token.descPosition(token));
 617 
 618         if (ch0 == '/') {
 619             return scanRegEx();
 620         } else if (ch0 == '<') {
 621             if (ch1 == '<') {
 622                 return scanHereString(lir, state);
 623             } else if (Character.isJavaIdentifierStart(ch1)) {
 624                 return scanXMLLiteral();
 625             }
 626         }
 627 
 628         return false;
 629     }
 630 
 631     /**
 632      * Scan over regex literal.
 633      *
 634      * @return True if a regex literal.
 635      */
 636     private boolean scanRegEx() {
 637         assert ch0 == '/';
 638         // Make sure it's not a comment.
 639         if (ch1 != '/' && ch1 != '*') {
 640             // Record beginning of literal.
 641             final int start = position;
 642             // Skip /.
 643             skip(1);
 644             boolean inBrackets = false;
 645 
 646             // Scan for closing /, stopping at end of line.
 647             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 648                 // Skip over escaped character.
 649                 if (ch0 == '\\') {
 650                     skip(1);
 651                     if (isEOL(ch0)) {
 652                         reset(start);
 653                         return false;
 654                     }
 655                     skip(1);
 656                 } else {
 657                     if (ch0 == '[') {
 658                         inBrackets = true;
 659                     } else if (ch0 == ']') {
 660                         inBrackets = false;
 661                     }
 662 
 663                     // Skip literal character.
 664                     skip(1);
 665                 }
 666             }
 667 
 668             // If regex literal.
 669             if (ch0 == '/') {
 670                 // Skip /.
 671                 skip(1);
 672 
 673                 // Skip over options.
 674                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 675                     skip(1);
 676                 }
 677 
 678                 // Add regex token.
 679                 add(REGEX, start);
 680                 // Regex literal detected.
 681                 return true;
 682             }
 683 
 684             // False start try again.
 685             reset(start);
 686         }
 687 
 688         // Regex literal not detected.
 689         return false;
 690     }
 691 
 692     /**
 693      * Convert a digit to a integer.  Can't use Character.digit since we are
 694      * restricted to ASCII by the spec.
 695      *
 696      * @param ch   Character to convert.
 697      * @param base Numeric base.
 698      *
 699      * @return The converted digit or -1 if invalid.
 700      */
 701     protected static int convertDigit(final char ch, final int base) {
 702         int digit;
 703 
 704         if ('0' <= ch && ch <= '9') {
 705             digit = ch - '0';
 706         } else if ('A' <= ch && ch <= 'Z') {
 707             digit = ch - 'A' + 10;
 708         } else if ('a' <= ch && ch <= 'z') {
 709             digit = ch - 'a' + 10;
 710         } else {
 711             return -1;
 712         }
 713 
 714         return digit < base ? digit : -1;
 715     }
 716 
 717 
 718     /**
 719      * Get the value of a hexadecimal numeric sequence.
 720      *
 721      * @param length Number of digits.
 722      * @param type   Type of token to report against.
 723      * @return Value of sequence or < 0 if no digits.
 724      */
 725     private int hexSequence(final int length, final TokenType type) {
 726         int value = 0;
 727 
 728         for (int i = 0; i < length; i++) {
 729             final int digit = convertDigit(ch0, 16);
 730 
 731             if (digit == -1) {
 732                 error(Lexer.message("invalid.hex"), type, position, limit);
 733                 return i == 0 ? -1 : value;
 734             }
 735 
 736             value = digit | value << 4;
 737             skip(1);
 738         }
 739 
 740         return value;
 741     }
 742 
 743     /**
 744      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 745      *
 746      * @return Value of sequence.
 747      */
 748     private int octalSequence() {
 749         int value = 0;
 750 
 751         for (int i = 0; i < 3; i++) {
 752             final int digit = convertDigit(ch0, 8);
 753 
 754             if (digit == -1) {
 755                 break;
 756             }
 757             value = digit | value << 3;
 758             skip(1);
 759 
 760             if (i == 1 && value >= 32) {
 761                 break;
 762             }
 763         }
 764         return value;
 765     }
 766 
 767     /**
 768      * Convert a string to a JavaScript identifier.
 769      *
 770      * @param start  Position in source content.
 771      * @param length Length of token.
 772      * @return Ident string or null if an error.
 773      */
 774     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 775         // Save the current position.
 776         final int savePosition = position;
 777         // End of scan.
 778         final int end = start + length;
 779         // Reset to beginning of content.
 780         reset(start);
 781         // Buffer for recording characters.
 782         final StringBuilder sb = new StringBuilder(length);
 783 
 784         // Scan until end of line or end of file.
 785         while (!atEOF() && position < end && !isEOL(ch0)) {
 786             // If escape character.
 787             if (ch0 == '\\' && ch1 == 'u') {
 788                 skip(2);
 789                 final int ch = hexSequence(4, TokenType.IDENT);
 790                 if (isWhitespace((char)ch)) {
 791                     return null;
 792                 }
 793                 if (ch < 0) {
 794                     sb.append('\\');
 795                     sb.append('u');
 796                 } else {
 797                     sb.append((char)ch);
 798                 }
 799             } else {
 800                 // Add regular character.
 801                 sb.append(ch0);
 802                 skip(1);
 803             }
 804         }
 805 
 806         // Restore position.
 807         reset(savePosition);
 808 
 809         return sb.toString();
 810     }
 811 
 812     /**
 813      * Scan over and identifier or keyword. Handles identifiers containing
 814      * encoded Unicode chars.
 815      *
 816      * Example:
 817      *
 818      * var \u0042 = 44;
 819      */
 820     private void scanIdentifierOrKeyword() {
 821         // Record beginning of identifier.
 822         final int start = position;
 823         // Scan identifier.
 824         final int length = scanIdentifier();
 825         // Check to see if it is a keyword.
 826         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 827         if (type == FUNCTION && pauseOnFunctionBody) {
 828             pauseOnNextLeftBrace = true;
 829         }
 830         // Add keyword or identifier token.
 831         add(type, start);
 832     }
 833 
 834     /**
 835      * Convert a string to a JavaScript string object.
 836      *
 837      * @param start  Position in source content.
 838      * @param length Length of token.
 839      * @return JavaScript string object.
 840      */
 841     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 842         // Save the current position.
 843         final int savePosition = position;
 844         // Calculate the end position.
 845         final int end = start + length;
 846         // Reset to beginning of string.
 847         reset(start);
 848 
 849         // Buffer for recording characters.
 850         final StringBuilder sb = new StringBuilder(length);
 851 
 852         // Scan until end of string.
 853         while (position < end) {
 854             // If escape character.
 855             if (ch0 == '\\') {
 856                 skip(1);
 857 
 858                 final char next = ch0;
 859                 final int afterSlash = position;
 860 
 861                 skip(1);
 862 
 863                 // Special characters.
 864                 switch (next) {
 865                 case '0':
 866                 case '1':
 867                 case '2':
 868                 case '3':
 869                 case '4':
 870                 case '5':
 871                 case '6':
 872                 case '7': {
 873                     if (strict) {
 874                         // "\0" itself is allowed in strict mode. Only other 'real'
 875                         // octal escape sequences are not allowed (eg. "\02", "\31").
 876                         // See section 7.8.4 String literals production EscapeSequence
 877                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 878                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 879                         }
 880                     }
 881                     reset(afterSlash);
 882                     // Octal sequence.
 883                     final int ch = octalSequence();
 884 
 885                     if (ch < 0) {
 886                         sb.append('\\');
 887                         sb.append('x');
 888                     } else {
 889                         sb.append((char)ch);
 890                     }
 891                     break;
 892                 }
 893                 case 'n':
 894                     sb.append('\n');
 895                     break;
 896                 case 't':
 897                     sb.append('\t');
 898                     break;
 899                 case 'b':
 900                     sb.append('\b');
 901                     break;
 902                 case 'f':
 903                     sb.append('\f');
 904                     break;
 905                 case 'r':
 906                     sb.append('\r');
 907                     break;
 908                 case '\'':
 909                     sb.append('\'');
 910                     break;
 911                 case '\"':
 912                     sb.append('\"');
 913                     break;
 914                 case '\\':
 915                     sb.append('\\');
 916                     break;
 917                 case '\r': // CR | CRLF
 918                     if (ch0 == '\n') {
 919                         skip(1);
 920                     }
 921                     // fall through
 922                 case '\n': // LF
 923                 case '\u2028': // LS
 924                 case '\u2029': // PS
 925                     // continue on the next line, slash-return continues string
 926                     // literal
 927                     break;
 928                 case 'x': {
 929                     // Hex sequence.
 930                     final int ch = hexSequence(2, STRING);
 931 
 932                     if (ch < 0) {
 933                         sb.append('\\');
 934                         sb.append('x');
 935                     } else {
 936                         sb.append((char)ch);
 937                     }
 938                 }
 939                     break;
 940                 case 'u': {
 941                     // Unicode sequence.
 942                     final int ch = hexSequence(4, STRING);
 943 
 944                     if (ch < 0) {
 945                         sb.append('\\');
 946                         sb.append('u');
 947                     } else {
 948                         sb.append((char)ch);
 949                     }
 950                 }
 951                     break;
 952                 case 'v':
 953                     sb.append('\u000B');
 954                     break;
 955                 // All other characters.
 956                 default:
 957                     sb.append(next);
 958                     break;
 959                 }
 960             } else if (ch0 == '\r') {
 961                 // Convert CR-LF or CR to LF line terminator.
 962                 sb.append('\n');
 963                 skip(ch1 == '\n' ? 2 : 1);
 964             } else {
 965                 // Add regular character.
 966                 sb.append(ch0);
 967                 skip(1);
 968             }
 969         }
 970 
 971         // Restore position.
 972         reset(savePosition);
 973 
 974         return sb.toString();
 975     }
 976 
 977     /**
 978      * Scan over a string literal.
 979      * @param add true if we are not just scanning but should actually modify the token stream
 980      */
 981     protected void scanString(final boolean add) {
 982         // Type of string.
 983         TokenType type = STRING;
 984         // Record starting quote.
 985         final char quote = ch0;
 986         // Skip over quote.
 987         skip(1);
 988 
 989         // Record beginning of string content.
 990         final State stringState = saveState();
 991 
 992         // Scan until close quote or end of line.
 993         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 994             // Skip over escaped character.
 995             if (ch0 == '\\') {
 996                 type = ESCSTRING;
 997                 skip(1);
 998                 if (! isEscapeCharacter(ch0)) {
 999                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
1000                 }
1001                 if (isEOL(ch0)) {
1002                     // Multiline string literal
1003                     skipEOL(false);
1004                     continue;
1005                 }
1006             }
1007             // Skip literal character.
1008             skip(1);
1009         }
1010 
1011         // If close quote.
1012         if (ch0 == quote) {
1013             // Skip close quote.
1014             skip(1);
1015         } else {
1016             error(Lexer.message("missing.close.quote"), STRING, position, limit);
1017         }
1018 
1019         // If not just scanning.
1020         if (add) {
1021             // Record end of string.
1022             stringState.setLimit(position - 1);
1023 
1024             if (scripting && !stringState.isEmpty()) {
1025                 switch (quote) {
1026                 case '`':
1027                     // Mark the beginning of an exec string.
1028                     add(EXECSTRING, stringState.position, stringState.limit);
1029                     // Frame edit string with left brace.
1030                     add(LBRACE, stringState.position, stringState.position);
1031                     // Process edit string.
1032                     editString(type, stringState);
1033                     // Frame edit string with right brace.
1034                     add(RBRACE, stringState.limit, stringState.limit);
1035                     break;
1036                 case '"':
1037                     // Only edit double quoted strings.
1038                     editString(type, stringState);
1039                     break;
1040                 case '\'':
1041                     // Add string token without editing.
1042                     add(type, stringState.position, stringState.limit);
1043                     break;
1044                 default:
1045                     break;
1046                 }
1047             } else {
1048                 /// Add string token without editing.
1049                 add(type, stringState.position, stringState.limit);
1050             }
1051         }
1052     }
1053 
1054     /**
1055      * Scan over a template string literal.
1056      */
1057     private void scanTemplate() {
1058         assert ch0 == '`';
1059         TokenType type = TEMPLATE;
1060 
1061         // Skip over quote and record beginning of string content.
1062         skip(1);
1063         State stringState = saveState();
1064 
1065         // Scan until close quote
1066         while (!atEOF()) {
1067             // Skip over escaped character.
1068             if (ch0 == '`') {
1069                 skip(1);
1070                 // Record end of string.
1071                 stringState.setLimit(position - 1);
1072                 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1073                 return;
1074             } else if (ch0 == '$' && ch1 == '{') {
1075                 skip(2);
1076                 stringState.setLimit(position - 2);
1077                 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1078 
1079                 // scan to RBRACE
1080                 final Lexer expressionLexer = new Lexer(this, saveState());
1081                 expressionLexer.templateExpressionOpenBraces = 1;
1082                 expressionLexer.lexify();
1083                 restoreState(expressionLexer.saveState());
1084 
1085                 // scan next middle or tail of the template literal
1086                 assert ch0 == '}';
1087                 type = TEMPLATE_MIDDLE;
1088 
1089                 // Skip over rbrace and record beginning of string content.
1090                 skip(1);
1091                 stringState = saveState();
1092 
1093                 continue;
1094             } else if (ch0 == '\\') {
1095                 skip(1);
1096                 // EscapeSequence
1097                 if (!isEscapeCharacter(ch0)) {
1098                     error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1099                 }
1100                 if (isEOL(ch0)) {
1101                     // LineContinuation
1102                     skipEOL(false);
1103                     continue;
1104                 }
1105             }  else if (isEOL(ch0)) {
1106                 // LineTerminatorSequence
1107                 skipEOL(false);
1108                 continue;
1109             }
1110 
1111             // Skip literal character.
1112             skip(1);
1113         }
1114 
1115         error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1116     }
1117 
1118     /**
1119      * Is the given character a valid escape char after "\" ?
1120      *
1121      * @param ch character to be checked
1122      * @return if the given character is valid after "\"
1123      */
1124     protected boolean isEscapeCharacter(final char ch) {
1125         return true;
1126     }
1127 
1128     /**
1129      * Convert string to number.
1130      *
1131      * @param valueString  String to convert.
1132      * @param radix        Numeric base.
1133      * @return Converted number.
1134      */
1135     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1136         try {
1137             return Integer.parseInt(valueString, radix);
1138         } catch (final NumberFormatException e) {
1139             if (radix == 10) {
1140                 return Double.valueOf(valueString);
1141             }
1142 
1143             double value = 0.0;
1144 
1145             for (int i = 0; i < valueString.length(); i++) {
1146                 final char ch = valueString.charAt(i);
1147                 // Preverified, should always be a valid digit.
1148                 final int digit = convertDigit(ch, radix);
1149                 value *= radix;
1150                 value += digit;
1151             }
1152 
1153             return value;
1154         }
1155     }
1156 
1157     /**
1158      * Scan a number.
1159      */
1160     protected void scanNumber() {
1161         // Record beginning of number.
1162         final int start = position;
1163         // Assume value is a decimal.
1164         TokenType type = DECIMAL;
1165 
1166         // First digit of number.
1167         int digit = convertDigit(ch0, 10);
1168 
1169         // If number begins with 0x.
1170         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1171             // Skip over 0xN.
1172             skip(3);
1173             // Skip over remaining digits.
1174             while (convertDigit(ch0, 16) != -1) {
1175                 skip(1);
1176             }
1177 
1178             type = HEXADECIMAL;
1179         } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1180             // Skip over 0oN.
1181             skip(3);
1182             // Skip over remaining digits.
1183             while (convertDigit(ch0, 8) != -1) {
1184                 skip(1);
1185             }
1186 
1187             type = OCTAL;
1188         } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1189             // Skip over 0bN.
1190             skip(3);
1191             // Skip over remaining digits.
1192             while (convertDigit(ch0, 2) != -1) {
1193                 skip(1);
1194             }
1195 
1196             type = BINARY_NUMBER;
1197         } else {
1198             // Check for possible octal constant.
1199             boolean octal = digit == 0;
1200             // Skip first digit if not leading '.'.
1201             if (digit != -1) {
1202                 skip(1);
1203             }
1204 
1205             // Skip remaining digits.
1206             while ((digit = convertDigit(ch0, 10)) != -1) {
1207                 // Check octal only digits.
1208                 octal = octal && digit < 8;
1209                 // Skip digit.
1210                 skip(1);
1211             }
1212 
1213             if (octal && position - start > 1) {
1214                 type = OCTAL_LEGACY;
1215             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1216                 // Must be a double.
1217                 if (ch0 == '.') {
1218                     // Skip period.
1219                     skip(1);
1220                     // Skip mantissa.
1221                     while (convertDigit(ch0, 10) != -1) {
1222                         skip(1);
1223                     }
1224                 }
1225 
1226                 // Detect exponent.
1227                 if (ch0 == 'E' || ch0 == 'e') {
1228                     // Skip E.
1229                     skip(1);
1230                     // Detect and skip exponent sign.
1231                     if (ch0 == '+' || ch0 == '-') {
1232                         skip(1);
1233                     }
1234                     // Skip exponent.
1235                     while (convertDigit(ch0, 10) != -1) {
1236                         skip(1);
1237                     }
1238                 }
1239 
1240                 type = FLOATING;
1241             }
1242         }
1243 
1244         if (Character.isJavaIdentifierStart(ch0)) {
1245             error(Lexer.message("missing.space.after.number"), type, position, 1);
1246         }
1247 
1248         // Add number token.
1249         add(type, start);
1250     }
1251 
1252     /**
1253      * Convert a regex token to a token object.
1254      *
1255      * @param start  Position in source content.
1256      * @param length Length of regex token.
1257      * @return Regex token object.
1258      */
1259     XMLToken valueOfXML(final int start, final int length) {
1260         return new XMLToken(source.getString(start, length));
1261     }
1262 
1263     /**
1264      * Scan over a XML token.
1265      *
1266      * @return TRUE if is an XML literal.
1267      */
1268     private boolean scanXMLLiteral() {
1269         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1270         if (XML_LITERALS) {
1271             // Record beginning of xml expression.
1272             final int start = position;
1273 
1274             int openCount = 0;
1275 
1276             do {
1277                 if (ch0 == '<') {
1278                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1279                         skip(3);
1280                         openCount--;
1281                     } else if (Character.isJavaIdentifierStart(ch1)) {
1282                         skip(2);
1283                         openCount++;
1284                     } else if (ch1 == '?') {
1285                         skip(2);
1286                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1287                         skip(4);
1288                     } else {
1289                         reset(start);
1290                         return false;
1291                     }
1292 
1293                     while (!atEOF() && ch0 != '>') {
1294                         if (ch0 == '/' && ch1 == '>') {
1295                             openCount--;
1296                             skip(1);
1297                             break;
1298                         } else if (ch0 == '\"' || ch0 == '\'') {
1299                             scanString(false);
1300                         } else {
1301                             skip(1);
1302                         }
1303                     }
1304 
1305                     if (ch0 != '>') {
1306                         reset(start);
1307                         return false;
1308                     }
1309 
1310                     skip(1);
1311                 } else if (atEOF()) {
1312                     reset(start);
1313                     return false;
1314                 } else {
1315                     skip(1);
1316                 }
1317             } while (openCount > 0);
1318 
1319             add(XML, start);
1320             return true;
1321         }
1322 
1323         return false;
1324     }
1325 
1326     /**
1327      * Scan over identifier characters.
1328      *
1329      * @return Length of identifier or zero if none found.
1330      */
1331     private int scanIdentifier() {
1332         final int start = position;
1333 
1334         // Make sure first character is valid start character.
1335         if (ch0 == '\\' && ch1 == 'u') {
1336             skip(2);
1337             final int ch = hexSequence(4, TokenType.IDENT);
1338 
1339             if (!Character.isJavaIdentifierStart(ch)) {
1340                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1341             }
1342         } else if (!Character.isJavaIdentifierStart(ch0)) {
1343             // Not an identifier.
1344             return 0;
1345         }
1346 
1347         // Make sure remaining characters are valid part characters.
1348         while (!atEOF()) {
1349             if (ch0 == '\\' && ch1 == 'u') {
1350                 skip(2);
1351                 final int ch = hexSequence(4, TokenType.IDENT);
1352 
1353                 if (!Character.isJavaIdentifierPart(ch)) {
1354                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1355                 }
1356             } else if (Character.isJavaIdentifierPart(ch0)) {
1357                 skip(1);
1358             } else {
1359                 break;
1360             }
1361         }
1362 
1363         // Length of identifier sequence.
1364         return position - start;
1365     }
1366 
1367     /**
1368      * Compare two identifiers (in content) for equality.
1369      *
1370      * @param aStart  Start of first identifier.
1371      * @param aLength Length of first identifier.
1372      * @param bStart  Start of second identifier.
1373      * @param bLength Length of second identifier.
1374      * @return True if equal.
1375      */
1376     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1377         if (aLength == bLength) {
1378             for (int i = 0; i < aLength; i++) {
1379                 if (content[aStart + i] != content[bStart + i]) {
1380                     return false;
1381                 }
1382             }
1383 
1384             return true;
1385         }
1386 
1387         return false;
1388     }
1389 
1390     /**
1391      * Detect if a line starts with a marker identifier.
1392      *
1393      * @param identStart  Start of identifier.
1394      * @param identLength Length of identifier.
1395      * @return True if detected.
1396      */
1397     private boolean hasHereMarker(final int identStart, final int identLength) {
1398         // Skip any whitespace.
1399         skipWhitespace(false);
1400 
1401         return identifierEqual(identStart, identLength, position, scanIdentifier());
1402     }
1403 
1404     /**
1405      * Lexer to service edit strings.
1406      */
1407     private static class EditStringLexer extends Lexer {
1408         /** Type of string literals to emit. */
1409         final TokenType stringType;
1410 
1411         /*
1412          * Constructor.
1413          */
1414 
1415         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1416             super(lexer, stringState);
1417 
1418             this.stringType = stringType;
1419         }
1420 
1421         /**
1422          * Lexify the contents of the string.
1423          */
1424         @Override
1425         public void lexify() {
1426             // Record start of string position.
1427             int stringStart = position;
1428             // Indicate that the priming first string has not been emitted.
1429             boolean primed = false;
1430 
1431             while (true) {
1432                 // Detect end of content.
1433                 if (atEOF()) {
1434                     break;
1435                 }
1436 
1437                 // Honour escapes (should be well formed.)
1438                 if (ch0 == '\\' && stringType == ESCSTRING) {
1439                     skip(2);
1440 
1441                     continue;
1442                 }
1443 
1444                 // If start of expression.
1445                 if (ch0 == '$' && ch1 == '{') {
1446                     if (!primed || stringStart != position) {
1447                         if (primed) {
1448                             add(ADD, stringStart, stringStart + 1);
1449                         }
1450 
1451                         add(stringType, stringStart, position);
1452                         primed = true;
1453                     }
1454 
1455                     // Skip ${
1456                     skip(2);
1457 
1458                     // Save expression state.
1459                     final State expressionState = saveState();
1460 
1461                     // Start with one open brace.
1462                     int braceCount = 1;
1463 
1464                     // Scan for the rest of the string.
1465                     while (!atEOF()) {
1466                         // If closing brace.
1467                         if (ch0 == '}') {
1468                             // Break only only if matching brace.
1469                             if (--braceCount == 0) {
1470                                 break;
1471                             }
1472                         } else if (ch0 == '{') {
1473                             // Bump up the brace count.
1474                             braceCount++;
1475                         }
1476 
1477                         // Skip to next character.
1478                         skip(1);
1479                     }
1480 
1481                     // If braces don't match then report an error.
1482                     if (braceCount != 0) {
1483                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1484                     }
1485 
1486                     // Mark end of expression.
1487                     expressionState.setLimit(position);
1488                     // Skip closing brace.
1489                     skip(1);
1490 
1491                     // Start next string.
1492                     stringStart = position;
1493 
1494                     // Concatenate expression.
1495                     add(ADD, expressionState.position, expressionState.position + 1);
1496                     add(LPAREN, expressionState.position, expressionState.position + 1);
1497 
1498                     // Scan expression.
1499                     final Lexer lexer = new Lexer(this, expressionState);
1500                     lexer.lexify();
1501 
1502                     // Close out expression parenthesis.
1503                     add(RPAREN, position - 1, position);
1504 
1505                     continue;
1506                 }
1507 
1508                 // Next character in string.
1509                 skip(1);
1510             }
1511 
1512             // If there is any unemitted string portion.
1513             if (stringStart != limit) {
1514                 // Concatenate remaining string.
1515                 if (primed) {
1516                     add(ADD, stringStart, 1);
1517                 }
1518 
1519                 add(stringType, stringStart, limit);
1520             }
1521         }
1522 
1523     }
1524 
1525     /**
1526      * Edit string for nested expressions.
1527      *
1528      * @param stringType  Type of string literals to emit.
1529      * @param stringState State of lexer at start of string.
1530      */
1531     private void editString(final TokenType stringType, final State stringState) {
1532         // Use special lexer to scan string.
1533         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1534         lexer.lexify();
1535 
1536         // Need to keep lexer informed.
1537         last = stringType;
1538     }
1539 
1540     /**
1541      * Scan over a here string.
1542      *
1543      * @return TRUE if is a here string.
1544      */
1545     private boolean scanHereString(final LineInfoReceiver lir, final State oldState) {
1546         assert ch0 == '<' && ch1 == '<';
1547         if (scripting) {
1548             // Record beginning of here string.
1549             final State saved = saveState();
1550 
1551             // << or <<<
1552             final boolean excludeLastEOL = ch2 != '<';
1553 
1554             if (excludeLastEOL) {
1555                 skip(2);
1556             } else {
1557                 skip(3);
1558             }
1559 
1560             // Scan identifier. It might be quoted, indicating that no string editing should take place.
1561             final char quoteChar = ch0;
1562             final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1563             if (noStringEditing) {
1564                 skip(1);
1565             }
1566             final int identStart = position;
1567             final int identLength = scanIdentifier();
1568             if (noStringEditing) {
1569                 if (ch0 != quoteChar) {
1570                     error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1571                     restoreState(saved);
1572                     return false;
1573                 }
1574                 skip(1);
1575             }
1576 
1577             // Check for identifier.
1578             if (identLength == 0) {
1579                 // Treat as shift.
1580                 restoreState(saved);
1581 
1582                 return false;
1583             }
1584 
1585             // Record rest of line.
1586             final State restState = saveState();
1587             // keep line number updated
1588             int lastLine = line;
1589 
1590             skipLine(false);
1591             lastLine++;
1592             int lastLinePosition = position;
1593             restState.setLimit(position);
1594 
1595             if (oldState.position > position) {
1596                 restoreState(oldState);
1597                 skipLine(false);
1598             }
1599 
1600             // Record beginning of string.
1601             final State stringState = saveState();
1602             int stringEnd = position;
1603 
1604             // Hunt down marker.
1605             while (!atEOF()) {
1606                 // Skip any whitespace.
1607                 skipWhitespace(false);
1608 
1609                 if (hasHereMarker(identStart, identLength)) {
1610                     break;
1611                 }
1612 
1613                 skipLine(false);
1614                 lastLine++;
1615                 lastLinePosition = position;
1616                 stringEnd = position;
1617             }
1618 
1619             // notify last line information
1620             lir.lineInfo(lastLine, lastLinePosition);
1621 
1622             // Record end of string.
1623             stringState.setLimit(stringEnd);
1624 
1625             // If marker is missing.
1626             if (stringState.isEmpty() || atEOF()) {
1627                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1628                 restoreState(saved);
1629 
1630                 return false;
1631             }
1632 
1633             // Remove last end of line if specified.
1634             if (excludeLastEOL) {
1635                 // Handles \n.
1636                 if (content[stringEnd - 1] == '\n') {
1637                     stringEnd--;
1638                 }
1639 
1640                 // Handles \r and \r\n.
1641                 if (content[stringEnd - 1] == '\r') {
1642                     stringEnd--;
1643                 }
1644 
1645                 // Update end of string.
1646                 stringState.setLimit(stringEnd);
1647             }
1648 
1649             // Edit string if appropriate.
1650             if (!noStringEditing && !stringState.isEmpty()) {
1651                 editString(STRING, stringState);
1652             } else {
1653                 // Add here string.
1654                 add(STRING, stringState.position, stringState.limit);
1655             }
1656 
1657             // Scan rest of original line.
1658             final Lexer restLexer = new Lexer(this, restState);
1659 
1660             restLexer.lexify();
1661 
1662             return true;
1663         }
1664 
1665         return false;
1666     }
1667 
1668     /**
1669      * Breaks source content down into lex units, adding tokens to the token
1670      * stream. The routine scans until the stream buffer is full. Can be called
1671      * repeatedly until EOF is detected.
1672      */
1673     public void lexify() {
1674         while (!stream.isFull() || nested) {
1675             // Skip over whitespace.
1676             skipWhitespace(true);
1677 
1678             // Detect end of file.
1679             if (atEOF()) {
1680                 if (!nested) {
1681                     // Add an EOF token at the end.
1682                     add(EOF, position);
1683                 }
1684 
1685                 break;
1686             }
1687 
1688             // Check for comments. Note that we don't scan for regexp and other literals here as
1689             // we may not have enough context to distinguish them from similar looking operators.
1690             // Instead we break on ambiguous operators below and let the parser decide.
1691             if (ch0 == '/' && skipComments()) {
1692                 continue;
1693             }
1694 
1695             if (scripting && ch0 == '#' && skipComments()) {
1696                 continue;
1697             }
1698 
1699             // TokenType for lookup of delimiter or operator.
1700             TokenType type;
1701 
1702             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1703                 // '.' followed by digit.
1704                 // Scan and add a number.
1705                 scanNumber();
1706             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1707                 if (templateExpressionOpenBraces > 0) {
1708                     if (type == LBRACE) {
1709                         templateExpressionOpenBraces++;
1710                     } else if (type == RBRACE) {
1711                         if (--templateExpressionOpenBraces == 0) {
1712                             break;
1713                         }
1714                     }
1715                 }
1716 
1717                 // Get the number of characters in the token.
1718                 final int typeLength = type.getLength();
1719                 // Skip that many characters.
1720                 skip(typeLength);
1721                 // Add operator token.
1722                 add(type, position - typeLength);
1723                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1724                 // We break to let the parser decide what it is.
1725                 if (canStartLiteral(type)) {
1726                     break;
1727                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1728                     pauseOnNextLeftBrace = false;
1729                     break;
1730                 }
1731             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1732                 // Scan and add identifier or keyword.
1733                 scanIdentifierOrKeyword();
1734             } else if (isStringDelimiter(ch0)) {
1735                 // Scan and add a string.
1736                 scanString(true);
1737             } else if (Character.isDigit(ch0)) {
1738                 // Scan and add a number.
1739                 scanNumber();
1740             } else if (isTemplateDelimiter(ch0) && es6) {
1741                 // Scan and add template in ES6 mode.
1742                 scanTemplate();
1743             } else if (isTemplateDelimiter(ch0) && scripting) {
1744                 // Scan and add an exec string ('`') in scripting mode.
1745                 scanString(true);
1746             } else {
1747                 // Don't recognize this character.
1748                 skip(1);
1749                 add(ERROR, position - 1);
1750             }
1751         }
1752     }
1753 
1754     /**
1755      * Return value of token given its token descriptor.
1756      *
1757      * @param token  Token descriptor.
1758      * @return JavaScript value.
1759      */
1760     Object getValueOf(final long token, final boolean strict) {
1761         final int start = Token.descPosition(token);
1762         final int len   = Token.descLength(token);
1763 
1764         switch (Token.descType(token)) {
1765         case DECIMAL:
1766             return Lexer.valueOf(source.getString(start, len), 10); // number
1767         case HEXADECIMAL:
1768             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1769         case OCTAL_LEGACY:
1770             return Lexer.valueOf(source.getString(start, len), 8); // number
1771         case OCTAL:
1772             return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1773         case BINARY_NUMBER:
1774             return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1775         case FLOATING:
1776             final String str   = source.getString(start, len);
1777             final double value = Double.valueOf(str);
1778             if (str.indexOf('.') != -1) {
1779                 return value; //number
1780             }
1781             //anything without an explicit decimal point is still subject to a
1782             //"representable as int or long" check. Then the programmer does not
1783             //explicitly code something as a double. For example new Color(int, int, int)
1784             //and new Color(float, float, float) will get ambiguous for cases like
1785             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1786             //yet we don't want e.g. 1e6 to be a double unnecessarily
1787             if (JSType.isStrictlyRepresentableAsInt(value)) {
1788                 return (int)value;
1789             }
1790             return value;
1791         case STRING:
1792             return source.getString(start, len); // String
1793         case ESCSTRING:
1794             return valueOfString(start, len, strict); // String
1795         case IDENT:
1796             return valueOfIdent(start, len); // String
1797         case REGEX:
1798             return valueOfPattern(start, len); // RegexToken::LexerToken
1799         case TEMPLATE:
1800         case TEMPLATE_HEAD:
1801         case TEMPLATE_MIDDLE:
1802         case TEMPLATE_TAIL:
1803             return valueOfString(start, len, true); // String
1804         case XML:
1805             return valueOfXML(start, len); // XMLToken::LexerToken
1806         case DIRECTIVE_COMMENT:
1807             return source.getString(start, len);
1808         default:
1809             break;
1810         }
1811 
1812         return null;
1813     }
1814 
1815     /**
1816      * Get the raw string value of a template literal string part.
1817      *
1818      * @param token template string token
1819      * @return raw string
1820      */
1821     public String valueOfRawString(final long token) {
1822         final int start  = Token.descPosition(token);
1823         final int length = Token.descLength(token);
1824 
1825         // Save the current position.
1826         final int savePosition = position;
1827         // Calculate the end position.
1828         final int end = start + length;
1829         // Reset to beginning of string.
1830         reset(start);
1831 
1832         // Buffer for recording characters.
1833         final StringBuilder sb = new StringBuilder(length);
1834 
1835         // Scan until end of string.
1836         while (position < end) {
1837             if (ch0 == '\r') {
1838                 // Convert CR-LF or CR to LF line terminator.
1839                 sb.append('\n');
1840                 skip(ch1 == '\n' ? 2 : 1);
1841             } else {
1842                 // Add regular character.
1843                 sb.append(ch0);
1844                 skip(1);
1845             }
1846         }
1847 
1848         // Restore position.
1849         reset(savePosition);
1850 
1851         return sb.toString();
1852     }
1853 
1854     /**
1855      * Get the correctly localized error message for a given message id format arguments
1856      * @param msgId message id
1857      * @param args  format arguments
1858      * @return message
1859      */
1860     protected static String message(final String msgId, final String... args) {
1861         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1862     }
1863 
1864     /**
1865      * Generate a runtime exception
1866      *
1867      * @param message       error message
1868      * @param type          token type
1869      * @param start         start position of lexed error
1870      * @param length        length of lexed error
1871      * @throws ParserException  unconditionally
1872      */
1873     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1874         final long token     = Token.toDesc(type, start, length);
1875         final int  pos       = Token.descPosition(token);
1876         final int  lineNum   = source.getLine(pos);
1877         final int  columnNum = source.getColumn(pos);
1878         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1879         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1880     }
1881 
1882     /**
1883      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1884      * This is the abstract superclass
1885      */
1886     public static abstract class LexerToken implements Serializable {
1887         private static final long serialVersionUID = 1L;
1888 
1889         private final String expression;
1890 
1891         /**
1892          * Constructor
1893          * @param expression token expression
1894          */
1895         protected LexerToken(final String expression) {
1896             this.expression = expression;
1897         }
1898 
1899         /**
1900          * Get the expression
1901          * @return expression
1902          */
1903         public String getExpression() {
1904             return expression;
1905         }
1906     }
1907 
1908     /**
1909      * Temporary container for regular expressions.
1910      */
1911     public static class RegexToken extends LexerToken {
1912         private static final long serialVersionUID = 1L;
1913 
1914         /** Options. */
1915         private final String options;
1916 
1917         /**
1918          * Constructor.
1919          *
1920          * @param expression  regexp expression
1921          * @param options     regexp options
1922          */
1923         public RegexToken(final String expression, final String options) {
1924             super(expression);
1925             this.options = options;
1926         }
1927 
1928         /**
1929          * Get regexp options
1930          * @return options
1931          */
1932         public String getOptions() {
1933             return options;
1934         }
1935 
1936         @Override
1937         public String toString() {
1938             return '/' + getExpression() + '/' + options;
1939         }
1940     }
1941 
1942     /**
1943      * Temporary container for XML expression.
1944      */
1945     public static class XMLToken extends LexerToken {
1946         private static final long serialVersionUID = 1L;
1947 
1948         /**
1949          * Constructor.
1950          *
1951          * @param expression  XML expression
1952          */
1953         public XMLToken(final String expression) {
1954             super(expression);
1955         }
1956     }
1957 }