1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.EOL;
  32 import static jdk.nashorn.internal.parser.TokenType.IDENT;
  33 import java.util.HashMap;
  34 import java.util.Map;
  35 import jdk.nashorn.internal.ir.IdentNode;
  36 import jdk.nashorn.internal.ir.LiteralNode;
  37 import jdk.nashorn.internal.parser.Lexer.LexerToken;
  38 import jdk.nashorn.internal.parser.Lexer.RegexToken;
  39 import jdk.nashorn.internal.runtime.ECMAErrors;
  40 import jdk.nashorn.internal.runtime.ErrorManager;
  41 import jdk.nashorn.internal.runtime.JSErrorType;
  42 import jdk.nashorn.internal.runtime.ParserException;
  43 import jdk.nashorn.internal.runtime.Source;
  44 import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
  45 
  46 /**
  47  * Base class for parsers.
  48  */
  49 public abstract class AbstractParser {
  50     /** Source to parse. */
  51     protected final Source source;
  52 
  53     /** Error manager to report errors. */
  54     protected final ErrorManager errors;
  55 
  56     /** Stream of lex tokens to parse. */
  57     protected TokenStream stream;
  58 
  59     /** Index of current token. */
  60     protected int k;
  61 
  62     /** Previous token - accessible to sub classes */
  63     protected long previousToken;
  64 
  65     /** Descriptor of current token. */
  66     protected long token;
  67 
  68     /** Type of current token. */
  69     protected TokenType type;
  70 
  71     /** Type of last token. */
  72     protected TokenType last;
  73 
  74     /** Start position of current token. */
  75     protected int start;
  76 
  77     /** Finish position of previous token. */
  78     protected int finish;
  79 
  80     /** Current line number. */
  81     protected int line;
  82 
  83     /** Position of last EOL + 1. */
  84     protected int linePosition;
  85 
  86     /** Lexer used to scan source content. */
  87     protected Lexer lexer;
  88 
  89     /** Is this parser running under strict mode? */
  90     protected boolean isStrictMode;
  91 
  92     /** What should line numbers be counted from? */
  93     protected final int lineOffset;
  94 
  95     private final Map<String, String> canonicalNames = new HashMap<>();
  96 
  97     /**
  98      * Construct a parser.
  99      *
 100      * @param source     Source to parse.
 101      * @param errors     Error reporting manager.
 102      * @param strict     True if we are in strict mode
 103      * @param lineOffset Offset from which lines should be counted
 104      */
 105     protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
 106         if (source.getLength() > Token.LENGTH_MASK) {
 107             throw new RuntimeException("Source exceeds size limit of " + Token.LENGTH_MASK + " bytes");
 108         }
 109         this.source       = source;
 110         this.errors       = errors;
 111         this.k            = -1;
 112         this.token        = Token.toDesc(EOL, 0, 1);
 113         this.type         = EOL;
 114         this.last         = EOL;
 115         this.isStrictMode = strict;
 116         this.lineOffset   = lineOffset;
 117     }
 118 
 119     /**
 120      * Get the ith token.
 121      *
 122      * @param i Index of token.
 123      *
 124      * @return  the token
 125      */
 126     protected final long getToken(final int i) {
 127         // Make sure there are enough tokens available.
 128         while (i > stream.last()) {
 129             // If we need to buffer more for lookahead.
 130             if (stream.isFull()) {
 131                 stream.grow();
 132             }
 133 
 134             // Get more tokens.
 135             lexer.lexify();
 136         }
 137 
 138         return stream.get(i);
 139     }
 140 
 141     /**
 142      * Return the tokenType of the ith token.
 143      *
 144      * @param i Index of token
 145      *
 146      * @return the token type
 147      */
 148     protected final TokenType T(final int i) {
 149         // Get token descriptor and extract tokenType.
 150         return Token.descType(getToken(i));
 151     }
 152 
 153     /**
 154      * Seek next token that is not an EOL or comment.
 155      *
 156      * @return tokenType of next token.
 157      */
 158     protected final TokenType next() {
 159         do {
 160             nextOrEOL();
 161         } while (type == EOL || type == COMMENT);
 162 
 163         return type;
 164     }
 165 
 166     /**
 167      * Seek next token or EOL (skipping comments.)
 168      *
 169      * @return tokenType of next token.
 170      */
 171     protected final TokenType nextOrEOL() {
 172         do {
 173             nextToken();
 174             if (type == DIRECTIVE_COMMENT) {
 175                 checkDirectiveComment();
 176             }
 177         } while (type == COMMENT || type == DIRECTIVE_COMMENT);
 178 
 179         return type;
 180     }
 181 
 182     // sourceURL= after directive comment
 183     private static final String SOURCE_URL_PREFIX = "sourceURL=";
 184 
 185     // currently only @sourceURL=foo supported
 186     private void checkDirectiveComment() {
 187         // if already set, ignore this one
 188         if (source.getExplicitURL() != null) {
 189             return;
 190         }
 191 
 192         final String comment = (String) lexer.getValueOf(token, isStrictMode);
 193         final int len = comment.length();
 194         // 4 characters for directive comment marker //@\s or //#\s
 195         if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
 196             source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
 197         }
 198     }
 199 
 200     /**
 201      * Seek next token.
 202      *
 203      * @return tokenType of next token.
 204      */
 205     private TokenType nextToken() {
 206         // Capture last token type, but ignore comments (which are irrelevant for the purpose of newline detection).
 207         if (type != COMMENT) {
 208             last = type;
 209         }
 210         if (type != EOF) {
 211 
 212             // Set up next token.
 213             k++;
 214             final long lastToken = token;
 215             previousToken = token;
 216             token = getToken(k);
 217             type = Token.descType(token);
 218 
 219             // do this before the start is changed below
 220             if (last != EOL) {
 221                 finish = start + Token.descLength(lastToken);
 222             }
 223 
 224             if (type == EOL) {
 225                 line         = Token.descLength(token);
 226                 linePosition = Token.descPosition(token);
 227             } else {
 228                 start = Token.descPosition(token);
 229             }
 230 
 231         }
 232 
 233         return type;
 234     }
 235 
 236     /**
 237      * Get the message string for a message ID and arguments
 238      *
 239      * @param msgId The Message ID
 240      * @param args  The arguments
 241      *
 242      * @return The message string
 243      */
 244     protected static String message(final String msgId, final String... args) {
 245         return ECMAErrors.getMessage("parser.error." + msgId, args);
 246     }
 247 
 248     /**
 249      * Report an error.
 250      *
 251      * @param message    Error message.
 252      * @param errorToken Offending token.
 253      * @return ParserException upon failure. Caller should throw and not ignore
 254      */
 255     protected final ParserException error(final String message, final long errorToken) {
 256         return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
 257     }
 258 
 259     /**
 260      * Report an error.
 261      *
 262      * @param errorType  The error type
 263      * @param message    Error message.
 264      * @param errorToken Offending token.
 265      * @return ParserException upon failure. Caller should throw and not ignore
 266      */
 267     protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
 268         final int position  = Token.descPosition(errorToken);
 269         final int lineNum   = source.getLine(position);
 270         final int columnNum = source.getColumn(position);
 271         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
 272         return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
 273     }
 274 
 275     /**
 276      * Report an error.
 277      *
 278      * @param message Error message.
 279      * @return ParserException upon failure. Caller should throw and not ignore
 280      */
 281     protected final ParserException error(final String message) {
 282         return error(JSErrorType.SYNTAX_ERROR, message);
 283     }
 284 
 285     /**
 286      * Report an error.
 287      *
 288      * @param errorType  The error type
 289      * @param message    Error message.
 290      * @return ParserException upon failure. Caller should throw and not ignore
 291      */
 292     protected final ParserException error(final JSErrorType errorType, final String message) {
 293         // TODO - column needs to account for tabs.
 294         final int position = Token.descPosition(token);
 295         final int column = position - linePosition;
 296         final String formatted = ErrorManager.format(message, source, line, column, token);
 297         return new ParserException(errorType, formatted, source, line, column, token);
 298     }
 299 
 300     /**
 301      * Report a warning to the error manager.
 302      *
 303      * @param errorType  The error type of the warning
 304      * @param message    Warning message.
 305      * @param errorToken error token
 306      */
 307     protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
 308         errors.warning(error(errorType, message, errorToken));
 309     }
 310 
 311     /**
 312      * Generate 'expected' message.
 313      *
 314      * @param expected Expected tokenType.
 315      *
 316      * @return the message string
 317      */
 318     protected final String expectMessage(final TokenType expected) {
 319         final String tokenString = Token.toString(source, token);
 320         String msg;
 321 
 322         if (expected == null) {
 323             msg = AbstractParser.message("expected.stmt", tokenString);
 324         } else {
 325             final String expectedName = expected.getNameOrType();
 326             msg = AbstractParser.message("expected", expectedName, tokenString);
 327         }
 328 
 329         return msg;
 330     }
 331 
 332     /**
 333      * Check current token and advance to the next token.
 334      *
 335      * @param expected Expected tokenType.
 336      *
 337      * @throws ParserException on unexpected token type
 338      */
 339     protected final void expect(final TokenType expected) throws ParserException {
 340         expectDontAdvance(expected);
 341         next();
 342     }
 343 
 344     /**
 345      * Check current token, but don't advance to the next token.
 346      *
 347      * @param expected Expected tokenType.
 348      *
 349      * @throws ParserException on unexpected token type
 350      */
 351     protected final void expectDontAdvance(final TokenType expected) throws ParserException {
 352         if (type != expected) {
 353             throw error(expectMessage(expected));
 354         }
 355     }
 356 
 357     /**
 358      * Check next token, get its value and advance.
 359      *
 360      * @param  expected Expected tokenType.
 361      * @return The JavaScript value of the token
 362      * @throws ParserException on unexpected token type
 363      */
 364     protected final Object expectValue(final TokenType expected) throws ParserException {
 365         if (type != expected) {
 366             throw error(expectMessage(expected));
 367         }
 368 
 369         final Object value = getValue();
 370 
 371         next();
 372 
 373         return value;
 374     }
 375 
 376     /**
 377      * Get the value of the current token.
 378      *
 379      * @return JavaScript value of the token.
 380      */
 381     protected final Object getValue() {
 382         return getValue(token);
 383     }
 384 
 385     /**
 386      * Get the value of a specific token
 387      *
 388      * @param valueToken the token
 389      *
 390      * @return JavaScript value of the token
 391      */
 392     protected final Object getValue(final long valueToken) {
 393         try {
 394             return lexer.getValueOf(valueToken, isStrictMode);
 395         } catch (final ParserException e) {
 396             errors.error(e);
 397         }
 398 
 399         return null;
 400     }
 401 
 402     /**
 403      * Certain future reserved words can be used as identifiers in
 404      * non-strict mode. Check if the current token is one such.
 405      *
 406      * @return true if non strict mode identifier
 407      */
 408     protected final boolean isNonStrictModeIdent() {
 409         return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
 410     }
 411 
 412     /**
 413      * Get ident.
 414      *
 415      * @return Ident node.
 416      */
 417     protected final IdentNode getIdent() {
 418         // Capture IDENT token.
 419         long identToken = token;
 420 
 421         if (isNonStrictModeIdent()) {
 422             // Fake out identifier.
 423             identToken = Token.recast(token, IDENT);
 424             // Get IDENT.
 425             final String ident = (String)getValue(identToken);
 426 
 427             next();
 428 
 429             // Create IDENT node.
 430             return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
 431         }
 432 
 433         // Get IDENT.
 434         final String ident = (String)expectValue(IDENT);
 435         if (ident == null) {
 436             return null;
 437         }
 438         // Create IDENT node.
 439         return createIdentNode(identToken, finish, ident);
 440     }
 441 
 442     /**
 443      * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
 444      * constructor} but making sure that the {@code name} is deduplicated within this parse job.
 445      * @param identToken the token for the new {@code IdentNode}
 446      * @param identFinish the finish for the new {@code IdentNode}
 447      * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
 448      * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
 449      * be deduplicated.
 450      */
 451     protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
 452         final String existingName = canonicalNames.putIfAbsent(name, name);
 453         final String canonicalName = existingName != null ? existingName : name;
 454         return new IdentNode(identToken, identFinish, canonicalName);
 455     }
 456 
 457     /**
 458      * Check if current token is in identifier name
 459      *
 460      * @return true if current token is an identifier name
 461      */
 462     protected final boolean isIdentifierName() {
 463         final TokenKind kind = type.getKind();
 464         if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
 465             return true;
 466         }
 467 
 468         // only literals allowed are null, false and true
 469         if (kind == TokenKind.LITERAL) {
 470             switch (type) {
 471                 case FALSE:
 472                 case NULL:
 473                 case TRUE:
 474                     return true;
 475                 default:
 476                     return false;
 477             }
 478         }
 479 
 480         // Fake out identifier.
 481         final long identToken = Token.recast(token, IDENT);
 482         // Get IDENT.
 483         final String ident = (String)getValue(identToken);
 484         return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
 485     }
 486 
 487     /**
 488      * Create an IdentNode from the current token
 489      *
 490      * @return an IdentNode representing the current token
 491      */
 492     protected final IdentNode getIdentifierName() {
 493         if (type == IDENT) {
 494             return getIdent();
 495         } else if (isIdentifierName()) {
 496             // Fake out identifier.
 497             final long identToken = Token.recast(token, IDENT);
 498             // Get IDENT.
 499             final String ident = (String)getValue(identToken);
 500             next();
 501             // Create IDENT node.
 502             return createIdentNode(identToken, finish, ident);
 503         } else {
 504             expect(IDENT);
 505             return null;
 506         }
 507     }
 508 
 509     /**
 510      * Create a LiteralNode from the current token
 511      *
 512      * @return LiteralNode representing the current token
 513      * @throws ParserException if any literals fails to parse
 514      */
 515     protected final LiteralNode<?> getLiteral() throws ParserException {
 516         // Capture LITERAL token.
 517         final long literalToken = token;
 518 
 519         // Create literal node.
 520         final Object value = getValue();
 521         // Advance to have a correct finish
 522         next();
 523 
 524         LiteralNode<?> node = null;
 525 
 526         if (value == null) {
 527             node = LiteralNode.newInstance(literalToken, finish);
 528         } else if (value instanceof Number) {
 529             node = LiteralNode.newInstance(literalToken, finish, (Number)value);
 530         } else if (value instanceof String) {
 531             node = LiteralNode.newInstance(literalToken, finish, (String)value);
 532         } else if (value instanceof LexerToken) {
 533             if (value instanceof RegexToken) {
 534                 final RegexToken regex = (RegexToken)value;
 535                 try {
 536                     RegExpFactory.validate(regex.getExpression(), regex.getOptions());
 537                 } catch (final ParserException e) {
 538                     throw error(e.getMessage());
 539                 }
 540             }
 541             node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
 542         } else {
 543             assert false : "unknown type for LiteralNode: " + value.getClass();
 544         }
 545 
 546         return node;
 547     }
 548 }