1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
  29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.EOL;
  32 import static jdk.nashorn.internal.parser.TokenType.IDENT;
  33 import java.util.HashMap;
  34 import java.util.Map;
  35 import jdk.nashorn.internal.ir.IdentNode;
  36 import jdk.nashorn.internal.ir.LiteralNode;
  37 import jdk.nashorn.internal.parser.Lexer.LexerToken;
  38 import jdk.nashorn.internal.parser.Lexer.RegexToken;
  39 import jdk.nashorn.internal.runtime.ECMAErrors;
  40 import jdk.nashorn.internal.runtime.ErrorManager;
  41 import jdk.nashorn.internal.runtime.JSErrorType;
  42 import jdk.nashorn.internal.runtime.ParserException;
  43 import jdk.nashorn.internal.runtime.Source;
  44 import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
  45 
  46 /**
  47  * Base class for parsers.
  48  */
  49 public abstract class AbstractParser {
  50     /** Source to parse. */
  51     protected final Source source;
  52 
  53     /** Error manager to report errors. */
  54     protected final ErrorManager errors;
  55 
  56     /** Stream of lex tokens to parse. */
  57     protected TokenStream stream;
  58 
  59     /** Index of current token. */
  60     protected int k;
  61 
  62     /** Previous token - accessible to sub classes */
  63     protected long previousToken;
  64 
  65     /** Descriptor of current token. */
  66     protected long token;
  67 
  68     /** Type of current token. */
  69     protected TokenType type;
  70 
  71     /** Type of last token. */
  72     protected TokenType last;
  73 
  74     /** Start position of current token. */
  75     protected int start;
  76 
  77     /** Finish position of previous token. */
  78     protected int finish;
  79 
  80     /** Current line number. */
  81     protected int line;
  82 
  83     /** Position of last EOL + 1. */
  84     protected int linePosition;
  85 
  86     /** Lexer used to scan source content. */
  87     protected Lexer lexer;
  88 
  89     /** Is this parser running under strict mode? */
  90     protected boolean isStrictMode;
  91 
  92     /** What should line numbers be counted from? */
  93     protected final int lineOffset;
  94 
  95     private final Map<String, String> canonicalNames = new HashMap<>();
  96 
  97     /**
  98      * Construct a parser.
  99      *
 100      * @param source     Source to parse.
 101      * @param errors     Error reporting manager.
 102      * @param strict     True if we are in strict mode
 103      * @param lineOffset Offset from which lines should be counted
 104      */
 105     protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
 106         this.source       = source;
 107         this.errors       = errors;
 108         this.k            = -1;
 109         this.token        = Token.toDesc(EOL, 0, 1);
 110         this.type         = EOL;
 111         this.last         = EOL;
 112         this.isStrictMode = strict;
 113         this.lineOffset   = lineOffset;
 114     }
 115 
 116     /**
 117      * Get the ith token.
 118      *
 119      * @param i Index of token.
 120      *
 121      * @return  the token
 122      */
 123     protected final long getToken(final int i) {
 124         // Make sure there are enough tokens available.
 125         while (i > stream.last()) {
 126             // If we need to buffer more for lookahead.
 127             if (stream.isFull()) {
 128                 stream.grow();
 129             }
 130 
 131             // Get more tokens.
 132             lexer.lexify();
 133         }
 134 
 135         return stream.get(i);
 136     }
 137 
 138     /**
 139      * Return the tokenType of the ith token.
 140      *
 141      * @param i Index of token
 142      *
 143      * @return the token type
 144      */
 145     protected final TokenType T(final int i) {
 146         // Get token descriptor and extract tokenType.
 147         return Token.descType(getToken(i));
 148     }
 149 
 150     /**
 151      * Seek next token that is not an EOL or comment.
 152      *
 153      * @return tokenType of next token.
 154      */
 155     protected final TokenType next() {
 156         do {
 157             nextOrEOL();
 158         } while (type == EOL || type == COMMENT);
 159 
 160         return type;
 161     }
 162 
 163     /**
 164      * Seek next token or EOL (skipping comments.)
 165      *
 166      * @return tokenType of next token.
 167      */
 168     protected final TokenType nextOrEOL() {
 169         do {
 170             nextToken();
 171             if (type == DIRECTIVE_COMMENT) {
 172                 checkDirectiveComment();
 173             }
 174         } while (type == COMMENT || type == DIRECTIVE_COMMENT);
 175 
 176         return type;
 177     }
 178 
 179     // sourceURL= after directive comment
 180     private static final String SOURCE_URL_PREFIX = "sourceURL=";
 181 
 182     // currently only @sourceURL=foo supported
 183     private void checkDirectiveComment() {
 184         // if already set, ignore this one
 185         if (source.getExplicitURL() != null) {
 186             return;
 187         }
 188 
 189         final String comment = (String) lexer.getValueOf(token, isStrictMode);
 190         final int len = comment.length();
 191         // 4 characters for directive comment marker //@\s or //#\s
 192         if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
 193             source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
 194         }
 195     }
 196 
 197     /**
 198      * Seek next token.
 199      *
 200      * @return tokenType of next token.
 201      */
 202     private TokenType nextToken() {
 203         // Capture last token tokenType.
 204         last = type;
 205         if (type != EOF) {
 206 
 207             // Set up next token.
 208             k++;
 209             final long lastToken = token;
 210             previousToken = token;
 211             token = getToken(k);
 212             type = Token.descType(token);
 213 
 214             // do this before the start is changed below
 215             if (last != EOL) {
 216                 finish = start + Token.descLength(lastToken);
 217             }
 218 
 219             if (type == EOL) {
 220                 line         = Token.descLength(token);
 221                 linePosition = Token.descPosition(token);
 222             } else {
 223                 start = Token.descPosition(token);
 224             }
 225 
 226         }
 227 
 228         return type;
 229     }
 230 
 231     /**
 232      * Get the message string for a message ID and arguments
 233      *
 234      * @param msgId The Message ID
 235      * @param args  The arguments
 236      *
 237      * @return The message string
 238      */
 239     protected static String message(final String msgId, final String... args) {
 240         return ECMAErrors.getMessage("parser.error." + msgId, args);
 241     }
 242 
 243     /**
 244      * Report an error.
 245      *
 246      * @param message    Error message.
 247      * @param errorToken Offending token.
 248      * @return ParserException upon failure. Caller should throw and not ignore
 249      */
 250     protected final ParserException error(final String message, final long errorToken) {
 251         return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
 252     }
 253 
 254     /**
 255      * Report an error.
 256      *
 257      * @param errorType  The error type
 258      * @param message    Error message.
 259      * @param errorToken Offending token.
 260      * @return ParserException upon failure. Caller should throw and not ignore
 261      */
 262     protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
 263         final int position  = Token.descPosition(errorToken);
 264         final int lineNum   = source.getLine(position);
 265         final int columnNum = source.getColumn(position);
 266         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
 267         return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
 268     }
 269 
 270     /**
 271      * Report an error.
 272      *
 273      * @param message Error message.
 274      * @return ParserException upon failure. Caller should throw and not ignore
 275      */
 276     protected final ParserException error(final String message) {
 277         return error(JSErrorType.SYNTAX_ERROR, message);
 278     }
 279 
 280     /**
 281      * Report an error.
 282      *
 283      * @param errorType  The error type
 284      * @param message    Error message.
 285      * @return ParserException upon failure. Caller should throw and not ignore
 286      */
 287     protected final ParserException error(final JSErrorType errorType, final String message) {
 288         // TODO - column needs to account for tabs.
 289         final int position = Token.descPosition(token);
 290         final int column = position - linePosition;
 291         final String formatted = ErrorManager.format(message, source, line, column, token);
 292         return new ParserException(errorType, formatted, source, line, column, token);
 293     }
 294 
 295     /**
 296      * Report a warning to the error manager.
 297      *
 298      * @param errorType  The error type of the warning
 299      * @param message    Warning message.
 300      * @param errorToken error token
 301      */
 302     protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
 303         errors.warning(error(errorType, message, errorToken));
 304     }
 305 
 306     /**
 307      * Generate 'expected' message.
 308      *
 309      * @param expected Expected tokenType.
 310      *
 311      * @return the message string
 312      */
 313     protected final String expectMessage(final TokenType expected) {
 314         final String tokenString = Token.toString(source, token);
 315         String msg;
 316 
 317         if (expected == null) {
 318             msg = AbstractParser.message("expected.stmt", tokenString);
 319         } else {
 320             final String expectedName = expected.getNameOrType();
 321             msg = AbstractParser.message("expected", expectedName, tokenString);
 322         }
 323 
 324         return msg;
 325     }
 326 
 327     /**
 328      * Check current token and advance to the next token.
 329      *
 330      * @param expected Expected tokenType.
 331      *
 332      * @throws ParserException on unexpected token type
 333      */
 334     protected final void expect(final TokenType expected) throws ParserException {
 335         expectDontAdvance(expected);
 336         next();
 337     }
 338 
 339     /**
 340      * Check current token, but don't advance to the next token.
 341      *
 342      * @param expected Expected tokenType.
 343      *
 344      * @throws ParserException on unexpected token type
 345      */
 346     protected final void expectDontAdvance(final TokenType expected) throws ParserException {
 347         if (type != expected) {
 348             throw error(expectMessage(expected));
 349         }
 350     }
 351 
 352     /**
 353      * Check next token, get its value and advance.
 354      *
 355      * @param  expected Expected tokenType.
 356      * @return The JavaScript value of the token
 357      * @throws ParserException on unexpected token type
 358      */
 359     protected final Object expectValue(final TokenType expected) throws ParserException {
 360         if (type != expected) {
 361             throw error(expectMessage(expected));
 362         }
 363 
 364         final Object value = getValue();
 365 
 366         next();
 367 
 368         return value;
 369     }
 370 
 371     /**
 372      * Get the value of the current token.
 373      *
 374      * @return JavaScript value of the token.
 375      */
 376     protected final Object getValue() {
 377         return getValue(token);
 378     }
 379 
 380     /**
 381      * Get the value of a specific token
 382      *
 383      * @param valueToken the token
 384      *
 385      * @return JavaScript value of the token
 386      */
 387     protected final Object getValue(final long valueToken) {
 388         try {
 389             return lexer.getValueOf(valueToken, isStrictMode);
 390         } catch (final ParserException e) {
 391             errors.error(e);
 392         }
 393 
 394         return null;
 395     }
 396 
 397     /**
 398      * Certain future reserved words can be used as identifiers in
 399      * non-strict mode. Check if the current token is one such.
 400      *
 401      * @return true if non strict mode identifier
 402      */
 403     protected final boolean isNonStrictModeIdent() {
 404         return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
 405     }
 406 
 407     /**
 408      * Get ident.
 409      *
 410      * @return Ident node.
 411      */
 412     protected final IdentNode getIdent() {
 413         // Capture IDENT token.
 414         long identToken = token;
 415 
 416         if (isNonStrictModeIdent()) {
 417             // Fake out identifier.
 418             identToken = Token.recast(token, IDENT);
 419             // Get IDENT.
 420             final String ident = (String)getValue(identToken);
 421 
 422             next();
 423 
 424             // Create IDENT node.
 425             return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
 426         }
 427 
 428         // Get IDENT.
 429         final String ident = (String)expectValue(IDENT);
 430         if (ident == null) {
 431             return null;
 432         }
 433         // Create IDENT node.
 434         return createIdentNode(identToken, finish, ident);
 435     }
 436 
 437     /**
 438      * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
 439      * constructor} but making sure that the {@code name} is deduplicated within this parse job.
 440      * @param identToken the token for the new {@code IdentNode}
 441      * @param identFinish the finish for the new {@code IdentNode}
 442      * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
 443      * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
 444      * be deduplicated.
 445      */
 446     protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
 447         final String existingName = canonicalNames.putIfAbsent(name, name);
 448         final String canonicalName = existingName != null ? existingName : name;
 449         return new IdentNode(identToken, identFinish, canonicalName);
 450     }
 451 
 452     /**
 453      * Check if current token is in identifier name
 454      *
 455      * @return true if current token is an identifier name
 456      */
 457     protected final boolean isIdentifierName() {
 458         final TokenKind kind = type.getKind();
 459         if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
 460             return true;
 461         }













 462         // Fake out identifier.
 463         final long identToken = Token.recast(token, IDENT);
 464         // Get IDENT.
 465         final String ident = (String)getValue(identToken);
 466         return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
 467     }
 468 
 469     /**
 470      * Create an IdentNode from the current token
 471      *
 472      * @return an IdentNode representing the current token
 473      */
 474     protected final IdentNode getIdentifierName() {
 475         if (type == IDENT) {
 476             return getIdent();
 477         } else if (isIdentifierName()) {
 478             // Fake out identifier.
 479             final long identToken = Token.recast(token, IDENT);
 480             // Get IDENT.
 481             final String ident = (String)getValue(identToken);
 482             next();
 483             // Create IDENT node.
 484             return createIdentNode(identToken, finish, ident);
 485         } else {
 486             expect(IDENT);
 487             return null;
 488         }
 489     }
 490 
 491     /**
 492      * Create a LiteralNode from the current token
 493      *
 494      * @return LiteralNode representing the current token
 495      * @throws ParserException if any literals fails to parse
 496      */
 497     protected final LiteralNode<?> getLiteral() throws ParserException {
 498         // Capture LITERAL token.
 499         final long literalToken = token;
 500 
 501         // Create literal node.
 502         final Object value = getValue();
 503         // Advance to have a correct finish
 504         next();
 505 
 506         LiteralNode<?> node = null;
 507 
 508         if (value == null) {
 509             node = LiteralNode.newInstance(literalToken, finish);
 510         } else if (value instanceof Number) {
 511             node = LiteralNode.newInstance(literalToken, finish, (Number)value);
 512         } else if (value instanceof String) {
 513             node = LiteralNode.newInstance(literalToken, finish, (String)value);
 514         } else if (value instanceof LexerToken) {
 515             if (value instanceof RegexToken) {
 516                 final RegexToken regex = (RegexToken)value;
 517                 try {
 518                     RegExpFactory.validate(regex.getExpression(), regex.getOptions());
 519                 } catch (final ParserException e) {
 520                     throw error(e.getMessage());
 521                 }
 522             }
 523             node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
 524         } else {
 525             assert false : "unknown type for LiteralNode: " + value.getClass();
 526         }
 527 
 528         return node;
 529     }
 530 }
--- EOF ---