Old src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Lexer.java

   1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.ADD;
  29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
  30 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
  32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  33 import static jdk.nashorn.internal.parser.TokenType.EOF;
  34 import static jdk.nashorn.internal.parser.TokenType.EOL;
  35 import static jdk.nashorn.internal.parser.TokenType.ERROR;
  36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
  38 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
  39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
  40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
  41 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
  42 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
  43 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
  44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
  45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
  47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
  48 import static jdk.nashorn.internal.parser.TokenType.STRING;
  49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
  50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
  51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
  52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
  53 import static jdk.nashorn.internal.parser.TokenType.XML;
  54 
  55 import java.io.Serializable;
  56 
  57 import jdk.nashorn.internal.runtime.ECMAErrors;
  58 import jdk.nashorn.internal.runtime.ErrorManager;
  59 import jdk.nashorn.internal.runtime.JSErrorType;
  60 import jdk.nashorn.internal.runtime.JSType;
  61 import jdk.nashorn.internal.runtime.ParserException;
  62 import jdk.nashorn.internal.runtime.Source;
  63 import jdk.nashorn.internal.runtime.options.Options;
  64 
  65 /**
  66  * Responsible for converting source content into a stream of tokens.
  67  *
  68  */
  69 @SuppressWarnings("fallthrough")
  70 public class Lexer extends Scanner {
  71     private static final long MIN_INT_L = Integer.MIN_VALUE;
  72     private static final long MAX_INT_L = Integer.MAX_VALUE;
  73 
  74     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
  75 
  76     /** Content source. */
  77     private final Source source;
  78 
  79     /** Buffered stream for tokens. */
  80     private final TokenStream stream;
  81 
  82     /** True if here and edit strings are supported. */
  83     private final boolean scripting;
  84 
  85     /** True if parsing in ECMAScript 6 mode. */
  86     private final boolean es6;
  87 
  88     /** True if a nested scan. (scan to completion, no EOF.) */
  89     private final boolean nested;
  90 
  91     /** Pending new line number and position. */
  92     int pendingLine;
  93 
  94     /** Position of last EOL + 1. */
  95     private int linePosition;
  96 
  97     /** Type of last token added. */
  98     private TokenType last;
  99 
 100     private final boolean pauseOnFunctionBody;
 101     private boolean pauseOnNextLeftBrace;
 102 
 103     private int templateExpressionOpenBraces;
 104 
 105     private static final String SPACETAB = " \t";  // ASCII space and tab
 106     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
 107 
 108     private static final String JAVASCRIPT_WHITESPACE_EOL =
 109         LFCR +
 110         "\u2028" + // line separator
 111         "\u2029"   // paragraph separator
 112         ;
 113     private static final String JAVASCRIPT_WHITESPACE =
 114         SPACETAB +
 115         JAVASCRIPT_WHITESPACE_EOL +
 116         "\u000b" + // tabulation line
 117         "\u000c" + // ff (ctrl-l)
 118         "\u00a0" + // Latin-1 space
 119         "\u1680" + // Ogham space mark
 120         "\u180e" + // separator, Mongolian vowel
 121         "\u2000" + // en quad
 122         "\u2001" + // em quad
 123         "\u2002" + // en space
 124         "\u2003" + // em space
 125         "\u2004" + // three-per-em space
 126         "\u2005" + // four-per-em space
 127         "\u2006" + // six-per-em space
 128         "\u2007" + // figure space
 129         "\u2008" + // punctuation space
 130         "\u2009" + // thin space
 131         "\u200a" + // hair space
 132         "\u202f" + // narrow no-break space
 133         "\u205f" + // medium mathematical space
 134         "\u3000" + // ideographic space
 135         "\ufeff"   // byte order mark
 136         ;
 137 
 138     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
 139         "\\u000a" + // line feed
 140         "\\u000d" + // carriage return (ctrl-m)
 141         "\\u2028" + // line separator
 142         "\\u2029" + // paragraph separator
 143         "\\u0009" + // tab
 144         "\\u0020" + // ASCII space
 145         "\\u000b" + // tabulation line
 146         "\\u000c" + // ff (ctrl-l)
 147         "\\u00a0" + // Latin-1 space
 148         "\\u1680" + // Ogham space mark
 149         "\\u180e" + // separator, Mongolian vowel
 150         "\\u2000" + // en quad
 151         "\\u2001" + // em quad
 152         "\\u2002" + // en space
 153         "\\u2003" + // em space
 154         "\\u2004" + // three-per-em space
 155         "\\u2005" + // four-per-em space
 156         "\\u2006" + // six-per-em space
 157         "\\u2007" + // figure space
 158         "\\u2008" + // punctuation space
 159         "\\u2009" + // thin space
 160         "\\u200a" + // hair space
 161         "\\u202f" + // narrow no-break space
 162         "\\u205f" + // medium mathematical space
 163         "\\u3000" + // ideographic space
 164         "\\ufeff"   // byte order mark
 165         ;
 166 
 167     static String unicodeEscape(final char ch) {
 168         final StringBuilder sb = new StringBuilder();
 169 
 170         sb.append("\\u");
 171 
 172         final String hex = Integer.toHexString(ch);
 173         for (int i = hex.length(); i < 4; i++) {
 174             sb.append('0');
 175         }
 176         sb.append(hex);
 177 
 178         return sb.toString();
 179     }
 180 
 181     /**
 182      * Constructor
 183      *
 184      * @param source    the source
 185      * @param stream    the token stream to lex
 186      */
 187     public Lexer(final Source source, final TokenStream stream) {
 188         this(source, stream, false, false);
 189     }
 190 
 191     /**
 192      * Constructor
 193      *
 194      * @param source    the source
 195      * @param stream    the token stream to lex
 196      * @param scripting are we in scripting mode
 197      * @param es6       are we in ECMAScript 6 mode
 198      */
 199     public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
 200         this(source, 0, source.getLength(), stream, scripting, es6, false);
 201     }
 202 
 203     /**
 204      * Constructor
 205      *
 206      * @param source    the source
 207      * @param start     start position in source from which to start lexing
 208      * @param len       length of source segment to lex
 209      * @param stream    token stream to lex
 210      * @param scripting are we in scripting mode
 211      * @param es6       are we in ECMAScript 6 mode
 212      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
 213      * function body. This is used with the feature where the parser is skipping nested function bodies to
 214      * avoid reading ahead unnecessarily when we skip the function bodies.
 215      */
 216 
 217     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
 218         super(source.getContent(), 1, start, len);
 219         this.source      = source;
 220         this.stream      = stream;
 221         this.scripting   = scripting;
 222         this.es6         = es6;
 223         this.nested      = false;
 224         this.pendingLine = 1;
 225         this.last        = EOL;
 226 
 227         this.pauseOnFunctionBody = pauseOnFunctionBody;
 228     }
 229 
 230     private Lexer(final Lexer lexer, final State state) {
 231         super(lexer, state);
 232 
 233         source = lexer.source;
 234         stream = lexer.stream;
 235         scripting = lexer.scripting;
 236         es6 = lexer.es6;
 237         nested = true;
 238 
 239         pendingLine = state.pendingLine;
 240         linePosition = state.linePosition;
 241         last = EOL;
 242         pauseOnFunctionBody = false;
 243     }
 244 
 245     static class State extends Scanner.State {
 246         /** Pending new line number and position. */
 247         public final int pendingLine;
 248 
 249         /** Position of last EOL + 1. */
 250         public final int linePosition;
 251 
 252         /** Type of last token added. */
 253         public final TokenType last;
 254 
 255         /*
 256          * Constructor.
 257          */
 258 
 259         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
 260             super(position, limit, line);
 261 
 262             this.pendingLine = pendingLine;
 263             this.linePosition = linePosition;
 264             this.last = last;
 265         }
 266     }
 267 
 268     /**
 269      * Save the state of the scan.
 270      *
 271      * @return Captured state.
 272      */
 273     @Override
 274     State saveState() {
 275         return new State(position, limit, line, pendingLine, linePosition, last);
 276     }
 277 
 278     /**
 279      * Restore the state of the scan.
 280      *
 281      * @param state
 282      *            Captured state.
 283      */
 284     void restoreState(final State state) {
 285         super.restoreState(state);
 286 
 287         pendingLine = state.pendingLine;
 288         linePosition = state.linePosition;
 289         last = state.last;
 290     }
 291 
 292     /**
 293      * Add a new token to the stream.
 294      *
 295      * @param type
 296      *            Token type.
 297      * @param start
 298      *            Start position.
 299      * @param end
 300      *            End position.
 301      */
 302     protected void add(final TokenType type, final int start, final int end) {
 303         // Record last token.
 304         last = type;
 305 
 306         // Only emit the last EOL in a cluster.
 307         if (type == EOL) {
 308             pendingLine = end;
 309             linePosition = start;
 310         } else {
 311             // Write any pending EOL to stream.
 312             if (pendingLine != -1) {
 313                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
 314                 pendingLine = -1;
 315             }
 316 
 317             // Write token to stream.
 318             stream.put(Token.toDesc(type, start, end - start));
 319         }
 320     }
 321 
 322     /**
 323      * Add a new token to the stream.
 324      *
 325      * @param type
 326      *            Token type.
 327      * @param start
 328      *            Start position.
 329      */
 330     protected void add(final TokenType type, final int start) {
 331         add(type, start, position);
 332     }
 333 
 334     /**
 335      * Return the String of valid whitespace characters for regular
 336      * expressions in JavaScript
 337      * @return regexp whitespace string
 338      */
 339     public static String getWhitespaceRegExp() {
 340         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
 341     }
 342 
 343     /**
 344      * Skip end of line.
 345      *
 346      * @param addEOL true if EOL token should be recorded.
 347      */
 348     private void skipEOL(final boolean addEOL) {
 349 
 350         if (ch0 == '\r') { // detect \r\n pattern
 351             skip(1);
 352             if (ch0 == '\n') {
 353                 skip(1);
 354             }
 355         } else { // all other space, ch0 is guaranteed to be EOL or \0
 356             skip(1);
 357         }
 358 
 359         // bump up line count
 360         line++;
 361 
 362         if (addEOL) {
 363             // Add an EOL token.
 364             add(EOL, position, line);
 365         }
 366     }
 367 
 368     /**
 369      * Skip over rest of line including end of line.
 370      *
 371      * @param addEOL true if EOL token should be recorded.
 372      */
 373     private void skipLine(final boolean addEOL) {
 374         // Ignore characters.
 375         while (!isEOL(ch0) && !atEOF()) {
 376             skip(1);
 377         }
 378         // Skip over end of line.
 379         skipEOL(addEOL);
 380     }
 381 
 382     /**
 383      * Test whether a char is valid JavaScript whitespace
 384      * @param ch a char
 385      * @return true if valid JavaScript whitespace
 386      */
 387     public static boolean isJSWhitespace(final char ch) {
 388         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
 389     }
 390 
 391     /**
 392      * Test whether a char is valid JavaScript end of line
 393      * @param ch a char
 394      * @return true if valid JavaScript end of line
 395      */
 396     public static boolean isJSEOL(final char ch) {
 397         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
 398     }
 399 
 400     /**
 401      * Test if char is a string delimiter, e.g. '\' or '"'.
 402      * @param ch a char
 403      * @return true if string delimiter
 404      */
 405     protected boolean isStringDelimiter(final char ch) {
 406         return ch == '\'' || ch == '"';
 407     }
 408 
 409     /**
 410      * Test if char is a template literal delimiter ('`').
 411      */
 412     private static boolean isTemplateDelimiter(char ch) {
 413         return ch == '`';
 414     }
 415 
 416     /**
 417      * Test whether a char is valid JavaScript whitespace
 418      * @param ch a char
 419      * @return true if valid JavaScript whitespace
 420      */
 421     protected boolean isWhitespace(final char ch) {
 422         return Lexer.isJSWhitespace(ch);
 423     }
 424 
 425     /**
 426      * Test whether a char is valid JavaScript end of line
 427      * @param ch a char
 428      * @return true if valid JavaScript end of line
 429      */
 430     protected boolean isEOL(final char ch) {
 431         return Lexer.isJSEOL(ch);
 432     }
 433 
 434     /**
 435      * Skip over whitespace and detect end of line, adding EOL tokens if
 436      * encountered.
 437      *
 438      * @param addEOL true if EOL tokens should be recorded.
 439      */
 440     private void skipWhitespace(final boolean addEOL) {
 441         while (isWhitespace(ch0)) {
 442             if (isEOL(ch0)) {
 443                 skipEOL(addEOL);
 444             } else {
 445                 skip(1);
 446             }
 447         }
 448     }
 449 
 450     /**
 451      * Skip over comments.
 452      *
 453      * @return True if a comment.
 454      */
 455     protected boolean skipComments() {
 456         // Save the current position.
 457         final int start = position;
 458 
 459         if (ch0 == '/') {
 460             // Is it a // comment.
 461             if (ch1 == '/') {
 462                 // Skip over //.
 463                 skip(2);
 464 
 465                 boolean directiveComment = false;
 466                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
 467                     directiveComment = true;
 468                 }
 469 
 470                 // Scan for EOL.
 471                 while (!atEOF() && !isEOL(ch0)) {
 472                     skip(1);
 473                 }
 474                 // Did detect a comment.
 475                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
 476                 return true;
 477             } else if (ch1 == '*') {
 478                 // Skip over /*.
 479                 skip(2);
 480                 // Scan for */.
 481                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
 482                     // If end of line handle else skip character.
 483                     if (isEOL(ch0)) {
 484                         skipEOL(true);
 485                     } else {
 486                         skip(1);
 487                     }
 488                 }
 489 
 490                 if (atEOF()) {
 491                     // TODO - Report closing */ missing in parser.
 492                     add(ERROR, start);
 493                 } else {
 494                     // Skip */.
 495                     skip(2);
 496                 }
 497 
 498                 // Did detect a comment.
 499                 add(COMMENT, start);
 500                 return true;
 501             }
 502         } else if (ch0 == '#') {
 503             assert scripting;
 504             // shell style comment
 505             // Skip over #.
 506             skip(1);
 507             // Scan for EOL.
 508             while (!atEOF() && !isEOL(ch0)) {
 509                 skip(1);
 510             }
 511             // Did detect a comment.
 512             add(COMMENT, start);
 513             return true;
 514         }
 515 
 516         // Not a comment.
 517         return false;
 518     }
 519 
 520     /**
 521      * Convert a regex token to a token object.
 522      *
 523      * @param start  Position in source content.
 524      * @param length Length of regex token.
 525      * @return Regex token object.
 526      */
 527     public RegexToken valueOfPattern(final int start, final int length) {
 528         // Save the current position.
 529         final int savePosition = position;
 530         // Reset to beginning of content.
 531         reset(start);
 532         // Buffer for recording characters.
 533         final StringBuilder sb = new StringBuilder(length);
 534 
 535         // Skip /.
 536         skip(1);
 537         boolean inBrackets = false;
 538         // Scan for closing /, stopping at end of line.
 539         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
 540             // Skip over escaped character.
 541             if (ch0 == '\\') {
 542                 sb.append(ch0);
 543                 sb.append(ch1);
 544                 skip(2);
 545             } else {
 546                 if (ch0 == '[') {
 547                     inBrackets = true;
 548                 } else if (ch0 == ']') {
 549                     inBrackets = false;
 550                 }
 551 
 552                 // Skip literal character.
 553                 sb.append(ch0);
 554                 skip(1);
 555             }
 556         }
 557 
 558         // Get pattern as string.
 559         final String regex = sb.toString();
 560 
 561         // Skip /.
 562         skip(1);
 563 
 564         // Options as string.
 565         final String options = source.getString(position, scanIdentifier());
 566 
 567         reset(savePosition);
 568 
 569         // Compile the pattern.
 570         return new RegexToken(regex, options);
 571     }
 572 
 573     /**
 574      * Return true if the given token can be the beginning of a literal.
 575      *
 576      * @param token a token
 577      * @return true if token can start a literal.
 578      */
 579     public boolean canStartLiteral(final TokenType token) {
 580         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
 581     }
 582 
 583     /**
 584      * interface to receive line information for multi-line literals.
 585      */
 586     protected interface LineInfoReceiver {
 587         /**
 588          * Receives line information
 589          * @param line last line number
 590          * @param linePosition position of last line
 591          */
 592         public void lineInfo(int line, int linePosition);
 593     }
 594 
 595     /**
 596      * Check whether the given token represents the beginning of a literal. If so scan
 597      * the literal and return <tt>true</tt>, otherwise return false.
 598      *
 599      * @param token the token.
 600      * @param startTokenType the token type.
 601      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
 602      * @return True if a literal beginning with startToken was found and scanned.
 603      */
 604     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
 605         // Check if it can be a literal.
 606         if (!canStartLiteral(startTokenType)) {
 607             return false;
 608         }
 609         // We break on ambiguous tokens so if we already moved on it can't be a literal.
 610         if (stream.get(stream.last()) != token) {
 611             return false;
 612         }
 613         // Rewind to token start position
 614         reset(Token.descPosition(token));
 615 
 616         if (ch0 == '/') {
 617             return scanRegEx();
 618         } else if (ch0 == '<') {
 619             if (ch1 == '<') {
 620                 return scanHereString(lir);
 621             } else if (Character.isJavaIdentifierStart(ch1)) {
 622                 return scanXMLLiteral();
 623             }
 624         }
 625 
 626         return false;
 627     }
 628 
 629     /**
 630      * Scan over regex literal.
 631      *
 632      * @return True if a regex literal.
 633      */
 634     private boolean scanRegEx() {
 635         assert ch0 == '/';
 636         // Make sure it's not a comment.
 637         if (ch1 != '/' && ch1 != '*') {
 638             // Record beginning of literal.
 639             final int start = position;
 640             // Skip /.
 641             skip(1);
 642             boolean inBrackets = false;
 643 
 644             // Scan for closing /, stopping at end of line.
 645             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
 646                 // Skip over escaped character.
 647                 if (ch0 == '\\') {
 648                     skip(1);
 649                     if (isEOL(ch0)) {
 650                         reset(start);
 651                         return false;
 652                     }
 653                     skip(1);
 654                 } else {
 655                     if (ch0 == '[') {
 656                         inBrackets = true;
 657                     } else if (ch0 == ']') {
 658                         inBrackets = false;
 659                     }
 660 
 661                     // Skip literal character.
 662                     skip(1);
 663                 }
 664             }
 665 
 666             // If regex literal.
 667             if (ch0 == '/') {
 668                 // Skip /.
 669                 skip(1);
 670 
 671                 // Skip over options.
 672                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
 673                     skip(1);
 674                 }
 675 
 676                 // Add regex token.
 677                 add(REGEX, start);
 678                 // Regex literal detected.
 679                 return true;
 680             }
 681 
 682             // False start try again.
 683             reset(start);
 684         }
 685 
 686         // Regex literal not detected.
 687         return false;
 688     }
 689 
 690     /**
 691      * Convert a digit to a integer.  Can't use Character.digit since we are
 692      * restricted to ASCII by the spec.
 693      *
 694      * @param ch   Character to convert.
 695      * @param base Numeric base.
 696      *
 697      * @return The converted digit or -1 if invalid.
 698      */
 699     protected static int convertDigit(final char ch, final int base) {
 700         int digit;
 701 
 702         if ('0' <= ch && ch <= '9') {
 703             digit = ch - '0';
 704         } else if ('A' <= ch && ch <= 'Z') {
 705             digit = ch - 'A' + 10;
 706         } else if ('a' <= ch && ch <= 'z') {
 707             digit = ch - 'a' + 10;
 708         } else {
 709             return -1;
 710         }
 711 
 712         return digit < base ? digit : -1;
 713     }
 714 
 715 
 716     /**
 717      * Get the value of a hexadecimal numeric sequence.
 718      *
 719      * @param length Number of digits.
 720      * @param type   Type of token to report against.
 721      * @return Value of sequence or < 0 if no digits.
 722      */
 723     private int hexSequence(final int length, final TokenType type) {
 724         int value = 0;
 725 
 726         for (int i = 0; i < length; i++) {
 727             final int digit = convertDigit(ch0, 16);
 728 
 729             if (digit == -1) {
 730                 error(Lexer.message("invalid.hex"), type, position, limit);
 731                 return i == 0 ? -1 : value;
 732             }
 733 
 734             value = digit | value << 4;
 735             skip(1);
 736         }
 737 
 738         return value;
 739     }
 740 
 741     /**
 742      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
 743      *
 744      * @return Value of sequence.
 745      */
 746     private int octalSequence() {
 747         int value = 0;
 748 
 749         for (int i = 0; i < 3; i++) {
 750             final int digit = convertDigit(ch0, 8);
 751 
 752             if (digit == -1) {
 753                 break;
 754             }
 755             value = digit | value << 3;
 756             skip(1);
 757 
 758             if (i == 1 && value >= 32) {
 759                 break;
 760             }
 761         }
 762         return value;
 763     }
 764 
 765     /**
 766      * Convert a string to a JavaScript identifier.
 767      *
 768      * @param start  Position in source content.
 769      * @param length Length of token.
 770      * @return Ident string or null if an error.
 771      */
 772     private String valueOfIdent(final int start, final int length) throws RuntimeException {
 773         // Save the current position.
 774         final int savePosition = position;
 775         // End of scan.
 776         final int end = start + length;
 777         // Reset to beginning of content.
 778         reset(start);
 779         // Buffer for recording characters.
 780         final StringBuilder sb = new StringBuilder(length);
 781 
 782         // Scan until end of line or end of file.
 783         while (!atEOF() && position < end && !isEOL(ch0)) {
 784             // If escape character.
 785             if (ch0 == '\\' && ch1 == 'u') {
 786                 skip(2);
 787                 final int ch = hexSequence(4, TokenType.IDENT);
 788                 if (isWhitespace((char)ch)) {
 789                     return null;
 790                 }
 791                 if (ch < 0) {
 792                     sb.append('\\');
 793                     sb.append('u');
 794                 } else {
 795                     sb.append((char)ch);
 796                 }
 797             } else {
 798                 // Add regular character.
 799                 sb.append(ch0);
 800                 skip(1);
 801             }
 802         }
 803 
 804         // Restore position.
 805         reset(savePosition);
 806 
 807         return sb.toString();
 808     }
 809 
 810     /**
 811      * Scan over and identifier or keyword. Handles identifiers containing
 812      * encoded Unicode chars.
 813      *
 814      * Example:
 815      *
 816      * var \u0042 = 44;
 817      */
 818     private void scanIdentifierOrKeyword() {
 819         // Record beginning of identifier.
 820         final int start = position;
 821         // Scan identifier.
 822         final int length = scanIdentifier();
 823         // Check to see if it is a keyword.
 824         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
 825         if (type == FUNCTION && pauseOnFunctionBody) {
 826             pauseOnNextLeftBrace = true;
 827         }
 828         // Add keyword or identifier token.
 829         add(type, start);
 830     }
 831 
 832     /**
 833      * Convert a string to a JavaScript string object.
 834      *
 835      * @param start  Position in source content.
 836      * @param length Length of token.
 837      * @return JavaScript string object.
 838      */
 839     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
 840         // Save the current position.
 841         final int savePosition = position;
 842         // Calculate the end position.
 843         final int end = start + length;
 844         // Reset to beginning of string.
 845         reset(start);
 846 
 847         // Buffer for recording characters.
 848         final StringBuilder sb = new StringBuilder(length);
 849 
 850         // Scan until end of string.
 851         while (position < end) {
 852             // If escape character.
 853             if (ch0 == '\\') {
 854                 skip(1);
 855 
 856                 final char next = ch0;
 857                 final int afterSlash = position;
 858 
 859                 skip(1);
 860 
 861                 // Special characters.
 862                 switch (next) {
 863                 case '0':
 864                 case '1':
 865                 case '2':
 866                 case '3':
 867                 case '4':
 868                 case '5':
 869                 case '6':
 870                 case '7': {
 871                     if (strict) {
 872                         // "\0" itself is allowed in strict mode. Only other 'real'
 873                         // octal escape sequences are not allowed (eg. "\02", "\31").
 874                         // See section 7.8.4 String literals production EscapeSequence
 875                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
 876                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
 877                         }
 878                     }
 879                     reset(afterSlash);
 880                     // Octal sequence.
 881                     final int ch = octalSequence();
 882 
 883                     if (ch < 0) {
 884                         sb.append('\\');
 885                         sb.append('x');
 886                     } else {
 887                         sb.append((char)ch);
 888                     }
 889                     break;
 890                 }
 891                 case 'n':
 892                     sb.append('\n');
 893                     break;
 894                 case 't':
 895                     sb.append('\t');
 896                     break;
 897                 case 'b':
 898                     sb.append('\b');
 899                     break;
 900                 case 'f':
 901                     sb.append('\f');
 902                     break;
 903                 case 'r':
 904                     sb.append('\r');
 905                     break;
 906                 case '\'':
 907                     sb.append('\'');
 908                     break;
 909                 case '\"':
 910                     sb.append('\"');
 911                     break;
 912                 case '\\':
 913                     sb.append('\\');
 914                     break;
 915                 case '\r': // CR | CRLF
 916                     if (ch0 == '\n') {
 917                         skip(1);
 918                     }
 919                     // fall through
 920                 case '\n': // LF
 921                 case '\u2028': // LS
 922                 case '\u2029': // PS
 923                     // continue on the next line, slash-return continues string
 924                     // literal
 925                     break;
 926                 case 'x': {
 927                     // Hex sequence.
 928                     final int ch = hexSequence(2, STRING);
 929 
 930                     if (ch < 0) {
 931                         sb.append('\\');
 932                         sb.append('x');
 933                     } else {
 934                         sb.append((char)ch);
 935                     }
 936                 }
 937                     break;
 938                 case 'u': {
 939                     // Unicode sequence.
 940                     final int ch = hexSequence(4, STRING);
 941 
 942                     if (ch < 0) {
 943                         sb.append('\\');
 944                         sb.append('u');
 945                     } else {
 946                         sb.append((char)ch);
 947                     }
 948                 }
 949                     break;
 950                 case 'v':
 951                     sb.append('\u000B');
 952                     break;
 953                 // All other characters.
 954                 default:
 955                     sb.append(next);
 956                     break;
 957                 }
 958             } else if (ch0 == '\r') {
 959                 // Convert CR-LF or CR to LF line terminator.
 960                 sb.append('\n');
 961                 skip(ch1 == '\n' ? 2 : 1);
 962             } else {
 963                 // Add regular character.
 964                 sb.append(ch0);
 965                 skip(1);
 966             }
 967         }
 968 
 969         // Restore position.
 970         reset(savePosition);
 971 
 972         return sb.toString();
 973     }
 974 
 975     /**
 976      * Scan over a string literal.
 977      * @param add true if we are not just scanning but should actually modify the token stream
 978      */
 979     protected void scanString(final boolean add) {
 980         // Type of string.
 981         TokenType type = STRING;
 982         // Record starting quote.
 983         final char quote = ch0;
 984         // Skip over quote.
 985         skip(1);
 986 
 987         // Record beginning of string content.
 988         final State stringState = saveState();
 989 
 990         // Scan until close quote or end of line.
 991         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
 992             // Skip over escaped character.
 993             if (ch0 == '\\') {
 994                 type = ESCSTRING;
 995                 skip(1);
 996                 if (! isEscapeCharacter(ch0)) {
 997                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
 998                 }
 999                 if (isEOL(ch0)) {
1000                     // Multiline string literal
1001                     skipEOL(false);
1002                     continue;
1003                 }
1004             }
1005             // Skip literal character.
1006             skip(1);
1007         }
1008 
1009         // If close quote.
1010         if (ch0 == quote) {
1011             // Skip close quote.
1012             skip(1);
1013         } else {
1014             error(Lexer.message("missing.close.quote"), STRING, position, limit);
1015         }
1016 
1017         // If not just scanning.
1018         if (add) {
1019             // Record end of string.
1020             stringState.setLimit(position - 1);
1021 
1022             if (scripting && !stringState.isEmpty()) {
1023                 switch (quote) {
1024                 case '`':
1025                     // Mark the beginning of an exec string.
1026                     add(EXECSTRING, stringState.position, stringState.limit);
1027                     // Frame edit string with left brace.
1028                     add(LBRACE, stringState.position, stringState.position);
1029                     // Process edit string.
1030                     editString(type, stringState);
1031                     // Frame edit string with right brace.
1032                     add(RBRACE, stringState.limit, stringState.limit);
1033                     break;
1034                 case '"':
1035                     // Only edit double quoted strings.
1036                     editString(type, stringState);
1037                     break;
1038                 case '\'':
1039                     // Add string token without editing.
1040                     add(type, stringState.position, stringState.limit);
1041                     break;
1042                 default:
1043                     break;
1044                 }
1045             } else {
1046                 /// Add string token without editing.
1047                 add(type, stringState.position, stringState.limit);
1048             }
1049         }
1050     }
1051 
1052     /**
1053      * Scan over a template string literal.
1054      */
1055     private void scanTemplate() {
1056         assert ch0 == '`';
1057         TokenType type = TEMPLATE;
1058 
1059         // Skip over quote and record beginning of string content.
1060         skip(1);
1061         State stringState = saveState();
1062 
1063         // Scan until close quote
1064         while (!atEOF()) {
1065             // Skip over escaped character.
1066             if (ch0 == '`') {
1067                 skip(1);
1068                 // Record end of string.
1069                 stringState.setLimit(position - 1);
1070                 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1071                 return;
1072             } else if (ch0 == '$' && ch1 == '{') {
1073                 skip(2);
1074                 stringState.setLimit(position - 2);
1075                 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1076 
1077                 // scan to RBRACE
1078                 Lexer expressionLexer = new Lexer(this, saveState());
1079                 expressionLexer.templateExpressionOpenBraces = 1;
1080                 expressionLexer.lexify();
1081                 restoreState(expressionLexer.saveState());
1082 
1083                 // scan next middle or tail of the template literal
1084                 assert ch0 == '}';
1085                 type = TEMPLATE_MIDDLE;
1086 
1087                 // Skip over rbrace and record beginning of string content.
1088                 skip(1);
1089                 stringState = saveState();
1090 
1091                 continue;
1092             } else if (ch0 == '\\') {
1093                 skip(1);
1094                 // EscapeSequence
1095                 if (!isEscapeCharacter(ch0)) {
1096                     error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1097                 }
1098                 if (isEOL(ch0)) {
1099                     // LineContinuation
1100                     skipEOL(false);
1101                     continue;
1102                 }
1103             }  else if (isEOL(ch0)) {
1104                 // LineTerminatorSequence
1105                 skipEOL(false);
1106                 continue;
1107             }
1108 
1109             // Skip literal character.
1110             skip(1);
1111         }
1112 
1113         error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1114     }
1115 
1116     /**
1117      * Is the given character a valid escape char after "\" ?
1118      *
1119      * @param ch character to be checked
1120      * @return if the given character is valid after "\"
1121      */
1122     protected boolean isEscapeCharacter(final char ch) {
1123         return true;
1124     }
1125 
1126     /**
1127      * Convert string to number.
1128      *
1129      * @param valueString  String to convert.
1130      * @param radix        Numeric base.
1131      * @return Converted number.
1132      */
1133     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1134         try {
1135             final long value = Long.parseLong(valueString, radix);
1136             if(value >= MIN_INT_L && value <= MAX_INT_L) {
1137                 return (int)value;
1138             }
1139             return value;
1140         } catch (final NumberFormatException e) {
1141             if (radix == 10) {
1142                 return Double.valueOf(valueString);
1143             }
1144 
1145             double value = 0.0;
1146 
1147             for (int i = 0; i < valueString.length(); i++) {
1148                 final char ch = valueString.charAt(i);
1149                 // Preverified, should always be a valid digit.
1150                 final int digit = convertDigit(ch, radix);
1151                 value *= radix;
1152                 value += digit;
1153             }
1154 
1155             return value;
1156         }
1157     }
1158 
1159     /**
1160      * Scan a number.
1161      */
1162     protected void scanNumber() {
1163         // Record beginning of number.
1164         final int start = position;
1165         // Assume value is a decimal.
1166         TokenType type = DECIMAL;
1167 
1168         // First digit of number.
1169         int digit = convertDigit(ch0, 10);
1170 
1171         // If number begins with 0x.
1172         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1173             // Skip over 0xN.
1174             skip(3);
1175             // Skip over remaining digits.
1176             while (convertDigit(ch0, 16) != -1) {
1177                 skip(1);
1178             }
1179 
1180             type = HEXADECIMAL;
1181         } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1182             // Skip over 0oN.
1183             skip(3);
1184             // Skip over remaining digits.
1185             while (convertDigit(ch0, 8) != -1) {
1186                 skip(1);
1187             }
1188 
1189             type = OCTAL;
1190         } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1191             // Skip over 0bN.
1192             skip(3);
1193             // Skip over remaining digits.
1194             while (convertDigit(ch0, 2) != -1) {
1195                 skip(1);
1196             }
1197 
1198             type = BINARY_NUMBER;
1199         } else {
1200             // Check for possible octal constant.
1201             boolean octal = digit == 0;
1202             // Skip first digit if not leading '.'.
1203             if (digit != -1) {
1204                 skip(1);
1205             }
1206 
1207             // Skip remaining digits.
1208             while ((digit = convertDigit(ch0, 10)) != -1) {
1209                 // Check octal only digits.
1210                 octal = octal && digit < 8;
1211                 // Skip digit.
1212                 skip(1);
1213             }
1214 
1215             if (octal && position - start > 1) {
1216                 type = OCTAL_LEGACY;
1217             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1218                 // Must be a double.
1219                 if (ch0 == '.') {
1220                     // Skip period.
1221                     skip(1);
1222                     // Skip mantissa.
1223                     while (convertDigit(ch0, 10) != -1) {
1224                         skip(1);
1225                     }
1226                 }
1227 
1228                 // Detect exponent.
1229                 if (ch0 == 'E' || ch0 == 'e') {
1230                     // Skip E.
1231                     skip(1);
1232                     // Detect and skip exponent sign.
1233                     if (ch0 == '+' || ch0 == '-') {
1234                         skip(1);
1235                     }
1236                     // Skip exponent.
1237                     while (convertDigit(ch0, 10) != -1) {
1238                         skip(1);
1239                     }
1240                 }
1241 
1242                 type = FLOATING;
1243             }
1244         }
1245 
1246         if (Character.isJavaIdentifierStart(ch0)) {
1247             error(Lexer.message("missing.space.after.number"), type, position, 1);
1248         }
1249 
1250         // Add number token.
1251         add(type, start);
1252     }
1253 
1254     /**
1255      * Convert a regex token to a token object.
1256      *
1257      * @param start  Position in source content.
1258      * @param length Length of regex token.
1259      * @return Regex token object.
1260      */
1261     XMLToken valueOfXML(final int start, final int length) {
1262         return new XMLToken(source.getString(start, length));
1263     }
1264 
1265     /**
1266      * Scan over a XML token.
1267      *
1268      * @return TRUE if is an XML literal.
1269      */
1270     private boolean scanXMLLiteral() {
1271         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1272         if (XML_LITERALS) {
1273             // Record beginning of xml expression.
1274             final int start = position;
1275 
1276             int openCount = 0;
1277 
1278             do {
1279                 if (ch0 == '<') {
1280                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1281                         skip(3);
1282                         openCount--;
1283                     } else if (Character.isJavaIdentifierStart(ch1)) {
1284                         skip(2);
1285                         openCount++;
1286                     } else if (ch1 == '?') {
1287                         skip(2);
1288                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1289                         skip(4);
1290                     } else {
1291                         reset(start);
1292                         return false;
1293                     }
1294 
1295                     while (!atEOF() && ch0 != '>') {
1296                         if (ch0 == '/' && ch1 == '>') {
1297                             openCount--;
1298                             skip(1);
1299                             break;
1300                         } else if (ch0 == '\"' || ch0 == '\'') {
1301                             scanString(false);
1302                         } else {
1303                             skip(1);
1304                         }
1305                     }
1306 
1307                     if (ch0 != '>') {
1308                         reset(start);
1309                         return false;
1310                     }
1311 
1312                     skip(1);
1313                 } else if (atEOF()) {
1314                     reset(start);
1315                     return false;
1316                 } else {
1317                     skip(1);
1318                 }
1319             } while (openCount > 0);
1320 
1321             add(XML, start);
1322             return true;
1323         }
1324 
1325         return false;
1326     }
1327 
1328     /**
1329      * Scan over identifier characters.
1330      *
1331      * @return Length of identifier or zero if none found.
1332      */
1333     private int scanIdentifier() {
1334         final int start = position;
1335 
1336         // Make sure first character is valid start character.
1337         if (ch0 == '\\' && ch1 == 'u') {
1338             skip(2);
1339             final int ch = hexSequence(4, TokenType.IDENT);
1340 
1341             if (!Character.isJavaIdentifierStart(ch)) {
1342                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1343             }
1344         } else if (!Character.isJavaIdentifierStart(ch0)) {
1345             // Not an identifier.
1346             return 0;
1347         }
1348 
1349         // Make sure remaining characters are valid part characters.
1350         while (!atEOF()) {
1351             if (ch0 == '\\' && ch1 == 'u') {
1352                 skip(2);
1353                 final int ch = hexSequence(4, TokenType.IDENT);
1354 
1355                 if (!Character.isJavaIdentifierPart(ch)) {
1356                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1357                 }
1358             } else if (Character.isJavaIdentifierPart(ch0)) {
1359                 skip(1);
1360             } else {
1361                 break;
1362             }
1363         }
1364 
1365         // Length of identifier sequence.
1366         return position - start;
1367     }
1368 
1369     /**
1370      * Compare two identifiers (in content) for equality.
1371      *
1372      * @param aStart  Start of first identifier.
1373      * @param aLength Length of first identifier.
1374      * @param bStart  Start of second identifier.
1375      * @param bLength Length of second identifier.
1376      * @return True if equal.
1377      */
1378     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1379         if (aLength == bLength) {
1380             for (int i = 0; i < aLength; i++) {
1381                 if (content[aStart + i] != content[bStart + i]) {
1382                     return false;
1383                 }
1384             }
1385 
1386             return true;
1387         }
1388 
1389         return false;
1390     }
1391 
1392     /**
1393      * Detect if a line starts with a marker identifier.
1394      *
1395      * @param identStart  Start of identifier.
1396      * @param identLength Length of identifier.
1397      * @return True if detected.
1398      */
1399     private boolean hasHereMarker(final int identStart, final int identLength) {
1400         // Skip any whitespace.
1401         skipWhitespace(false);
1402 
1403         return identifierEqual(identStart, identLength, position, scanIdentifier());
1404     }
1405 
1406     /**
1407      * Lexer to service edit strings.
1408      */
1409     private static class EditStringLexer extends Lexer {
1410         /** Type of string literals to emit. */
1411         final TokenType stringType;
1412 
1413         /*
1414          * Constructor.
1415          */
1416 
1417         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1418             super(lexer, stringState);
1419 
1420             this.stringType = stringType;
1421         }
1422 
1423         /**
1424          * Lexify the contents of the string.
1425          */
1426         @Override
1427         public void lexify() {
1428             // Record start of string position.
1429             int stringStart = position;
1430             // Indicate that the priming first string has not been emitted.
1431             boolean primed = false;
1432 
1433             while (true) {
1434                 // Detect end of content.
1435                 if (atEOF()) {
1436                     break;
1437                 }
1438 
1439                 // Honour escapes (should be well formed.)
1440                 if (ch0 == '\\' && stringType == ESCSTRING) {
1441                     skip(2);
1442 
1443                     continue;
1444                 }
1445 
1446                 // If start of expression.
1447                 if (ch0 == '$' && ch1 == '{') {
1448                     if (!primed || stringStart != position) {
1449                         if (primed) {
1450                             add(ADD, stringStart, stringStart + 1);
1451                         }
1452 
1453                         add(stringType, stringStart, position);
1454                         primed = true;
1455                     }
1456 
1457                     // Skip ${
1458                     skip(2);
1459 
1460                     // Save expression state.
1461                     final State expressionState = saveState();
1462 
1463                     // Start with one open brace.
1464                     int braceCount = 1;
1465 
1466                     // Scan for the rest of the string.
1467                     while (!atEOF()) {
1468                         // If closing brace.
1469                         if (ch0 == '}') {
1470                             // Break only only if matching brace.
1471                             if (--braceCount == 0) {
1472                                 break;
1473                             }
1474                         } else if (ch0 == '{') {
1475                             // Bump up the brace count.
1476                             braceCount++;
1477                         }
1478 
1479                         // Skip to next character.
1480                         skip(1);
1481                     }
1482 
1483                     // If braces don't match then report an error.
1484                     if (braceCount != 0) {
1485                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1486                     }
1487 
1488                     // Mark end of expression.
1489                     expressionState.setLimit(position);
1490                     // Skip closing brace.
1491                     skip(1);
1492 
1493                     // Start next string.
1494                     stringStart = position;
1495 
1496                     // Concatenate expression.
1497                     add(ADD, expressionState.position, expressionState.position + 1);
1498                     add(LPAREN, expressionState.position, expressionState.position + 1);
1499 
1500                     // Scan expression.
1501                     final Lexer lexer = new Lexer(this, expressionState);
1502                     lexer.lexify();
1503 
1504                     // Close out expression parenthesis.
1505                     add(RPAREN, position - 1, position);
1506 
1507                     continue;
1508                 }
1509 
1510                 // Next character in string.
1511                 skip(1);
1512             }
1513 
1514             // If there is any unemitted string portion.
1515             if (stringStart != limit) {
1516                 // Concatenate remaining string.
1517                 if (primed) {
1518                     add(ADD, stringStart, 1);
1519                 }
1520 
1521                 add(stringType, stringStart, limit);
1522             }
1523         }
1524 
1525     }
1526 
1527     /**
1528      * Edit string for nested expressions.
1529      *
1530      * @param stringType  Type of string literals to emit.
1531      * @param stringState State of lexer at start of string.
1532      */
1533     private void editString(final TokenType stringType, final State stringState) {
1534         // Use special lexer to scan string.
1535         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1536         lexer.lexify();
1537 
1538         // Need to keep lexer informed.
1539         last = stringType;
1540     }
1541 
1542     /**
1543      * Scan over a here string.
1544      *
1545      * @return TRUE if is a here string.
1546      */
1547     private boolean scanHereString(final LineInfoReceiver lir) {
1548         assert ch0 == '<' && ch1 == '<';
1549         if (scripting) {
1550             // Record beginning of here string.
1551             final State saved = saveState();
1552 
1553             // << or <<<
1554             final boolean excludeLastEOL = ch2 != '<';
1555 
1556             if (excludeLastEOL) {
1557                 skip(2);
1558             } else {
1559                 skip(3);
1560             }
1561 
1562             // Scan identifier. It might be quoted, indicating that no string editing should take place.
1563             final char quoteChar = ch0;
1564             final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1565             if (noStringEditing) {
1566                 skip(1);
1567             }
1568             final int identStart = position;
1569             final int identLength = scanIdentifier();
1570             if (noStringEditing) {
1571                 if (ch0 != quoteChar) {
1572                     error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1573                     restoreState(saved);
1574                     return false;
1575                 }
1576                 skip(1);
1577             }
1578 
1579             // Check for identifier.
1580             if (identLength == 0) {
1581                 // Treat as shift.
1582                 restoreState(saved);
1583 
1584                 return false;
1585             }
1586 
1587             // Record rest of line.
1588             final State restState = saveState();
1589             // keep line number updated
1590             int lastLine = line;
1591 
1592             skipLine(false);
1593             lastLine++;
1594             int lastLinePosition = position;
1595             restState.setLimit(position);
1596 
1597             // Record beginning of string.
1598             final State stringState = saveState();
1599             int stringEnd = position;
1600 
1601             // Hunt down marker.
1602             while (!atEOF()) {
1603                 // Skip any whitespace.
1604                 skipWhitespace(false);
1605 
1606                 if (hasHereMarker(identStart, identLength)) {
1607                     break;
1608                 }
1609 
1610                 skipLine(false);
1611                 lastLine++;
1612                 lastLinePosition = position;
1613                 stringEnd = position;
1614             }
1615 
1616             // notify last line information
1617             lir.lineInfo(lastLine, lastLinePosition);
1618 
1619             // Record end of string.
1620             stringState.setLimit(stringEnd);
1621 
1622             // If marker is missing.
1623             if (stringState.isEmpty() || atEOF()) {
1624                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1625                 restoreState(saved);
1626 
1627                 return false;
1628             }
1629 
1630             // Remove last end of line if specified.
1631             if (excludeLastEOL) {
1632                 // Handles \n.
1633                 if (content[stringEnd - 1] == '\n') {
1634                     stringEnd--;
1635                 }
1636 
1637                 // Handles \r and \r\n.
1638                 if (content[stringEnd - 1] == '\r') {
1639                     stringEnd--;
1640                 }
1641 
1642                 // Update end of string.
1643                 stringState.setLimit(stringEnd);
1644             }
1645 
1646             // Edit string if appropriate.
1647             if (!noStringEditing && !stringState.isEmpty()) {
1648                 editString(STRING, stringState);
1649             } else {
1650                 // Add here string.
1651                 add(STRING, stringState.position, stringState.limit);
1652             }
1653 
1654             // Scan rest of original line.
1655             final Lexer restLexer = new Lexer(this, restState);
1656 
1657             restLexer.lexify();
1658 
1659             return true;
1660         }
1661 
1662         return false;
1663     }
1664 
1665     /**
1666      * Breaks source content down into lex units, adding tokens to the token
1667      * stream. The routine scans until the stream buffer is full. Can be called
1668      * repeatedly until EOF is detected.
1669      */
1670     public void lexify() {
1671         while (!stream.isFull() || nested) {
1672             // Skip over whitespace.
1673             skipWhitespace(true);
1674 
1675             // Detect end of file.
1676             if (atEOF()) {
1677                 if (!nested) {
1678                     // Add an EOF token at the end.
1679                     add(EOF, position);
1680                 }
1681 
1682                 break;
1683             }
1684 
1685             // Check for comments. Note that we don't scan for regexp and other literals here as
1686             // we may not have enough context to distinguish them from similar looking operators.
1687             // Instead we break on ambiguous operators below and let the parser decide.
1688             if (ch0 == '/' && skipComments()) {
1689                 continue;
1690             }
1691 
1692             if (scripting && ch0 == '#' && skipComments()) {
1693                 continue;
1694             }
1695 
1696             // TokenType for lookup of delimiter or operator.
1697             TokenType type;
1698 
1699             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1700                 // '.' followed by digit.
1701                 // Scan and add a number.
1702                 scanNumber();
1703             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1704                 if (templateExpressionOpenBraces > 0) {
1705                     if (type == LBRACE) {
1706                         templateExpressionOpenBraces++;
1707                     } else if (type == RBRACE) {
1708                         if (--templateExpressionOpenBraces == 0) {
1709                             break;
1710                         }
1711                     }
1712                 }
1713 
1714                 // Get the number of characters in the token.
1715                 final int typeLength = type.getLength();
1716                 // Skip that many characters.
1717                 skip(typeLength);
1718                 // Add operator token.
1719                 add(type, position - typeLength);
1720                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1721                 // We break to let the parser decide what it is.
1722                 if (canStartLiteral(type)) {
1723                     break;
1724                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1725                     pauseOnNextLeftBrace = false;
1726                     break;
1727                 }
1728             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1729                 // Scan and add identifier or keyword.
1730                 scanIdentifierOrKeyword();
1731             } else if (isStringDelimiter(ch0)) {
1732                 // Scan and add a string.
1733                 scanString(true);
1734             } else if (Character.isDigit(ch0)) {
1735                 // Scan and add a number.
1736                 scanNumber();
1737             } else if (isTemplateDelimiter(ch0) && es6) {
1738                 // Scan and add template in ES6 mode.
1739                 scanTemplate();
1740             } else if (isTemplateDelimiter(ch0) && scripting) {
1741                 // Scan and add an exec string ('`') in scripting mode.
1742                 scanString(true);
1743             } else {
1744                 // Don't recognize this character.
1745                 skip(1);
1746                 add(ERROR, position - 1);
1747             }
1748         }
1749     }
1750 
1751     /**
1752      * Return value of token given its token descriptor.
1753      *
1754      * @param token  Token descriptor.
1755      * @return JavaScript value.
1756      */
1757     Object getValueOf(final long token, final boolean strict) {
1758         final int start = Token.descPosition(token);
1759         final int len   = Token.descLength(token);
1760 
1761         switch (Token.descType(token)) {
1762         case DECIMAL:
1763             return Lexer.valueOf(source.getString(start, len), 10); // number
1764         case HEXADECIMAL:
1765             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1766         case OCTAL_LEGACY:
1767             return Lexer.valueOf(source.getString(start, len), 8); // number
1768         case OCTAL:
1769             return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1770         case BINARY_NUMBER:
1771             return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1772         case FLOATING:
1773             final String str   = source.getString(start, len);
1774             final double value = Double.valueOf(str);
1775             if (str.indexOf('.') != -1) {
1776                 return value; //number
1777             }
1778             //anything without an explicit decimal point is still subject to a
1779             //"representable as int or long" check. Then the programmer does not
1780             //explicitly code something as a double. For example new Color(int, int, int)
1781             //and new Color(float, float, float) will get ambiguous for cases like
1782             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1783             //yet we don't want e.g. 1e6 to be a double unnecessarily
1784             if (JSType.isStrictlyRepresentableAsInt(value)) {
1785                 return (int)value;
1786             } else if (JSType.isStrictlyRepresentableAsLong(value)) {
1787                 return (long)value;
1788             }
1789             return value;
1790         case STRING:
1791             return source.getString(start, len); // String
1792         case ESCSTRING:
1793             return valueOfString(start, len, strict); // String
1794         case IDENT:
1795             return valueOfIdent(start, len); // String
1796         case REGEX:
1797             return valueOfPattern(start, len); // RegexToken::LexerToken
1798         case TEMPLATE:
1799         case TEMPLATE_HEAD:
1800         case TEMPLATE_MIDDLE:
1801         case TEMPLATE_TAIL:
1802             return valueOfString(start, len, true); // String
1803         case XML:
1804             return valueOfXML(start, len); // XMLToken::LexerToken
1805         case DIRECTIVE_COMMENT:
1806             return source.getString(start, len);
1807         default:
1808             break;
1809         }
1810 
1811         return null;
1812     }
1813 
1814     /**
1815      * Get the raw string value of a template literal string part.
1816      *
1817      * @param token template string token
1818      * @return raw string
1819      */
1820     public String valueOfRawString(final long token) {
1821         final int start  = Token.descPosition(token);
1822         final int length = Token.descLength(token);
1823 
1824         // Save the current position.
1825         final int savePosition = position;
1826         // Calculate the end position.
1827         final int end = start + length;
1828         // Reset to beginning of string.
1829         reset(start);
1830 
1831         // Buffer for recording characters.
1832         final StringBuilder sb = new StringBuilder(length);
1833 
1834         // Scan until end of string.
1835         while (position < end) {
1836             if (ch0 == '\r') {
1837                 // Convert CR-LF or CR to LF line terminator.
1838                 sb.append('\n');
1839                 skip(ch1 == '\n' ? 2 : 1);
1840             } else {
1841                 // Add regular character.
1842                 sb.append(ch0);
1843                 skip(1);
1844             }
1845         }
1846 
1847         // Restore position.
1848         reset(savePosition);
1849 
1850         return sb.toString();
1851     }
1852 
1853     /**
1854      * Get the correctly localized error message for a given message id format arguments
1855      * @param msgId message id
1856      * @param args  format arguments
1857      * @return message
1858      */
1859     protected static String message(final String msgId, final String... args) {
1860         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1861     }
1862 
1863     /**
1864      * Generate a runtime exception
1865      *
1866      * @param message       error message
1867      * @param type          token type
1868      * @param start         start position of lexed error
1869      * @param length        length of lexed error
1870      * @throws ParserException  unconditionally
1871      */
1872     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1873         final long token     = Token.toDesc(type, start, length);
1874         final int  pos       = Token.descPosition(token);
1875         final int  lineNum   = source.getLine(pos);
1876         final int  columnNum = source.getColumn(pos);
1877         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1878         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1879     }
1880 
1881     /**
1882      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1883      * This is the abstract superclass
1884      */
1885     public static abstract class LexerToken implements Serializable {
1886         private static final long serialVersionUID = 1L;
1887 
1888         private final String expression;
1889 
1890         /**
1891          * Constructor
1892          * @param expression token expression
1893          */
1894         protected LexerToken(final String expression) {
1895             this.expression = expression;
1896         }
1897 
1898         /**
1899          * Get the expression
1900          * @return expression
1901          */
1902         public String getExpression() {
1903             return expression;
1904         }
1905     }
1906 
1907     /**
1908      * Temporary container for regular expressions.
1909      */
1910     public static class RegexToken extends LexerToken {
1911         private static final long serialVersionUID = 1L;
1912 
1913         /** Options. */
1914         private final String options;
1915 
1916         /**
1917          * Constructor.
1918          *
1919          * @param expression  regexp expression
1920          * @param options     regexp options
1921          */
1922         public RegexToken(final String expression, final String options) {
1923             super(expression);
1924             this.options = options;
1925         }
1926 
1927         /**
1928          * Get regexp options
1929          * @return options
1930          */
1931         public String getOptions() {
1932             return options;
1933         }
1934 
1935         @Override
1936         public String toString() {
1937             return '/' + getExpression() + '/' + options;
1938         }
1939     }
1940 
1941     /**
1942      * Temporary container for XML expression.
1943      */
1944     public static class XMLToken extends LexerToken {
1945         private static final long serialVersionUID = 1L;
1946 
1947         /**
1948          * Constructor.
1949          *
1950          * @param expression  XML expression
1951          */
1952         public XMLToken(final String expression) {
1953             super(expression);
1954         }
1955     }
1956 }