1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.COMMENT; 29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 30 import static jdk.nashorn.internal.parser.TokenType.EOF; 31 import static jdk.nashorn.internal.parser.TokenType.EOL; 32 import static jdk.nashorn.internal.parser.TokenType.IDENT; 33 import java.util.HashMap; 34 import java.util.Map; 35 import jdk.nashorn.internal.ir.IdentNode; 36 import jdk.nashorn.internal.ir.LiteralNode; 37 import jdk.nashorn.internal.parser.Lexer.LexerToken; 38 import jdk.nashorn.internal.parser.Lexer.RegexToken; 39 import jdk.nashorn.internal.runtime.ECMAErrors; 40 import jdk.nashorn.internal.runtime.ErrorManager; 41 import jdk.nashorn.internal.runtime.JSErrorType; 42 import jdk.nashorn.internal.runtime.ParserException; 43 import jdk.nashorn.internal.runtime.Source; 44 import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 45 46 /** 47 * Base class for parsers. 48 */ 49 public abstract class AbstractParser { 50 /** Source to parse. */ 51 protected final Source source; 52 53 /** Error manager to report errors. */ 54 protected final ErrorManager errors; 55 56 /** Stream of lex tokens to parse. */ 57 protected TokenStream stream; 58 59 /** Index of current token. */ 60 protected int k; 61 62 /** Previous token - accessible to sub classes */ 63 protected long previousToken; 64 65 /** Descriptor of current token. */ 66 protected long token; 67 68 /** Type of current token. */ 69 protected TokenType type; 70 71 /** Type of last token. */ 72 protected TokenType last; 73 74 /** Start position of current token. */ 75 protected int start; 76 77 /** Finish position of previous token. */ 78 protected int finish; 79 80 /** Current line number. */ 81 protected int line; 82 83 /** Position of last EOL + 1. */ 84 protected int linePosition; 85 86 /** Lexer used to scan source content. */ 87 protected Lexer lexer; 88 89 /** Is this parser running under strict mode? */ 90 protected boolean isStrictMode; 91 92 /** What should line numbers be counted from? */ 93 protected final int lineOffset; 94 95 private final Map<String, String> canonicalNames = new HashMap<>(); 96 97 /** 98 * Construct a parser. 99 * 100 * @param source Source to parse. 101 * @param errors Error reporting manager. 102 * @param strict True if we are in strict mode 103 * @param lineOffset Offset from which lines should be counted 104 */ 105 protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) { 106 this.source = source; 107 this.errors = errors; 108 this.k = -1; 109 this.token = Token.toDesc(EOL, 0, 1); 110 this.type = EOL; 111 this.last = EOL; 112 this.isStrictMode = strict; 113 this.lineOffset = lineOffset; 114 } 115 116 /** 117 * Get the ith token. 118 * 119 * @param i Index of token. 120 * 121 * @return the token 122 */ 123 protected final long getToken(final int i) { 124 // Make sure there are enough tokens available. 125 while (i > stream.last()) { 126 // If we need to buffer more for lookahead. 127 if (stream.isFull()) { 128 stream.grow(); 129 } 130 131 // Get more tokens. 132 lexer.lexify(); 133 } 134 135 return stream.get(i); 136 } 137 138 /** 139 * Return the tokenType of the ith token. 140 * 141 * @param i Index of token 142 * 143 * @return the token type 144 */ 145 protected final TokenType T(final int i) { 146 // Get token descriptor and extract tokenType. 147 return Token.descType(getToken(i)); 148 } 149 150 /** 151 * Seek next token that is not an EOL or comment. 152 * 153 * @return tokenType of next token. 154 */ 155 protected final TokenType next() { 156 do { 157 nextOrEOL(); 158 } while (type == EOL || type == COMMENT); 159 160 return type; 161 } 162 163 /** 164 * Seek next token or EOL (skipping comments.) 165 * 166 * @return tokenType of next token. 167 */ 168 protected final TokenType nextOrEOL() { 169 do { 170 nextToken(); 171 if (type == DIRECTIVE_COMMENT) { 172 checkDirectiveComment(); 173 } 174 } while (type == COMMENT || type == DIRECTIVE_COMMENT); 175 176 return type; 177 } 178 179 // sourceURL= after directive comment 180 private static final String SOURCE_URL_PREFIX = "sourceURL="; 181 182 // currently only @sourceURL=foo supported 183 private void checkDirectiveComment() { 184 // if already set, ignore this one 185 if (source.getExplicitURL() != null) { 186 return; 187 } 188 189 final String comment = (String) lexer.getValueOf(token, isStrictMode); 190 final int len = comment.length(); 191 // 4 characters for directive comment marker //@\s or //#\s 192 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) { 193 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length())); 194 } 195 } 196 197 /** 198 * Seek next token. 199 * 200 * @return tokenType of next token. 201 */ 202 private TokenType nextToken() { 203 // Capture last token tokenType. 204 last = type; 205 if (type != EOF) { 206 207 // Set up next token. 208 k++; 209 final long lastToken = token; 210 previousToken = token; 211 token = getToken(k); 212 type = Token.descType(token); 213 214 // do this before the start is changed below 215 if (last != EOL) { 216 finish = start + Token.descLength(lastToken); 217 } 218 219 if (type == EOL) { 220 line = Token.descLength(token); 221 linePosition = Token.descPosition(token); 222 } else { 223 start = Token.descPosition(token); 224 } 225 226 } 227 228 return type; 229 } 230 231 /** 232 * Get the message string for a message ID and arguments 233 * 234 * @param msgId The Message ID 235 * @param args The arguments 236 * 237 * @return The message string 238 */ 239 protected static String message(final String msgId, final String... args) { 240 return ECMAErrors.getMessage("parser.error." + msgId, args); 241 } 242 243 /** 244 * Report an error. 245 * 246 * @param message Error message. 247 * @param errorToken Offending token. 248 * @return ParserException upon failure. Caller should throw and not ignore 249 */ 250 protected final ParserException error(final String message, final long errorToken) { 251 return error(JSErrorType.SYNTAX_ERROR, message, errorToken); 252 } 253 254 /** 255 * Report an error. 256 * 257 * @param errorType The error type 258 * @param message Error message. 259 * @param errorToken Offending token. 260 * @return ParserException upon failure. Caller should throw and not ignore 261 */ 262 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) { 263 final int position = Token.descPosition(errorToken); 264 final int lineNum = source.getLine(position); 265 final int columnNum = source.getColumn(position); 266 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken); 267 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken); 268 } 269 270 /** 271 * Report an error. 272 * 273 * @param message Error message. 274 * @return ParserException upon failure. Caller should throw and not ignore 275 */ 276 protected final ParserException error(final String message) { 277 return error(JSErrorType.SYNTAX_ERROR, message); 278 } 279 280 /** 281 * Report an error. 282 * 283 * @param errorType The error type 284 * @param message Error message. 285 * @return ParserException upon failure. Caller should throw and not ignore 286 */ 287 protected final ParserException error(final JSErrorType errorType, final String message) { 288 // TODO - column needs to account for tabs. 289 final int position = Token.descPosition(token); 290 final int column = position - linePosition; 291 final String formatted = ErrorManager.format(message, source, line, column, token); 292 return new ParserException(errorType, formatted, source, line, column, token); 293 } 294 295 /** 296 * Report a warning to the error manager. 297 * 298 * @param errorType The error type of the warning 299 * @param message Warning message. 300 * @param errorToken error token 301 */ 302 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) { 303 errors.warning(error(errorType, message, errorToken)); 304 } 305 306 /** 307 * Generate 'expected' message. 308 * 309 * @param expected Expected tokenType. 310 * 311 * @return the message string 312 */ 313 protected final String expectMessage(final TokenType expected) { 314 final String tokenString = Token.toString(source, token); 315 String msg; 316 317 if (expected == null) { 318 msg = AbstractParser.message("expected.stmt", tokenString); 319 } else { 320 final String expectedName = expected.getNameOrType(); 321 msg = AbstractParser.message("expected", expectedName, tokenString); 322 } 323 324 return msg; 325 } 326 327 /** 328 * Check current token and advance to the next token. 329 * 330 * @param expected Expected tokenType. 331 * 332 * @throws ParserException on unexpected token type 333 */ 334 protected final void expect(final TokenType expected) throws ParserException { 335 expectDontAdvance(expected); 336 next(); 337 } 338 339 /** 340 * Check current token, but don't advance to the next token. 341 * 342 * @param expected Expected tokenType. 343 * 344 * @throws ParserException on unexpected token type 345 */ 346 protected final void expectDontAdvance(final TokenType expected) throws ParserException { 347 if (type != expected) { 348 throw error(expectMessage(expected)); 349 } 350 } 351 352 /** 353 * Check next token, get its value and advance. 354 * 355 * @param expected Expected tokenType. 356 * @return The JavaScript value of the token 357 * @throws ParserException on unexpected token type 358 */ 359 protected final Object expectValue(final TokenType expected) throws ParserException { 360 if (type != expected) { 361 throw error(expectMessage(expected)); 362 } 363 364 final Object value = getValue(); 365 366 next(); 367 368 return value; 369 } 370 371 /** 372 * Get the value of the current token. 373 * 374 * @return JavaScript value of the token. 375 */ 376 protected final Object getValue() { 377 return getValue(token); 378 } 379 380 /** 381 * Get the value of a specific token 382 * 383 * @param valueToken the token 384 * 385 * @return JavaScript value of the token 386 */ 387 protected final Object getValue(final long valueToken) { 388 try { 389 return lexer.getValueOf(valueToken, isStrictMode); 390 } catch (final ParserException e) { 391 errors.error(e); 392 } 393 394 return null; 395 } 396 397 /** 398 * Certain future reserved words can be used as identifiers in 399 * non-strict mode. Check if the current token is one such. 400 * 401 * @return true if non strict mode identifier 402 */ 403 protected final boolean isNonStrictModeIdent() { 404 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT; 405 } 406 407 /** 408 * Get ident. 409 * 410 * @return Ident node. 411 */ 412 protected final IdentNode getIdent() { 413 // Capture IDENT token. 414 long identToken = token; 415 416 if (isNonStrictModeIdent()) { 417 // Fake out identifier. 418 identToken = Token.recast(token, IDENT); 419 // Get IDENT. 420 final String ident = (String)getValue(identToken); 421 422 next(); 423 424 // Create IDENT node. 425 return createIdentNode(identToken, finish, ident).setIsFutureStrictName(); 426 } 427 428 // Get IDENT. 429 final String ident = (String)expectValue(IDENT); 430 if (ident == null) { 431 return null; 432 } 433 // Create IDENT node. 434 return createIdentNode(identToken, finish, ident); 435 } 436 437 /** 438 * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String) 439 * constructor} but making sure that the {@code name} is deduplicated within this parse job. 440 * @param identToken the token for the new {@code IdentNode} 441 * @param identFinish the finish for the new {@code IdentNode} 442 * @param name the name for the new {@code IdentNode}. It will be de-duplicated. 443 * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will 444 * be deduplicated. 445 */ 446 protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) { 447 final String existingName = canonicalNames.putIfAbsent(name, name); 448 final String canonicalName = existingName != null ? existingName : name; 449 return new IdentNode(identToken, identFinish, canonicalName); 450 } 451 452 /** 453 * Check if current token is in identifier name 454 * 455 * @return true if current token is an identifier name 456 */ 457 protected final boolean isIdentifierName() { 458 final TokenKind kind = type.getKind(); 459 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) { 460 return true; 461 } 462 463 // only literals allowed are null, false and true 464 if (kind == TokenKind.LITERAL) { 465 switch (type) { 466 case FALSE: 467 case NULL: 468 case TRUE: 469 return true; 470 default: 471 return false; 472 } 473 } 474 475 // Fake out identifier. 476 final long identToken = Token.recast(token, IDENT); 477 // Get IDENT. 478 final String ident = (String)getValue(identToken); 479 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0)); 480 } 481 482 /** 483 * Create an IdentNode from the current token 484 * 485 * @return an IdentNode representing the current token 486 */ 487 protected final IdentNode getIdentifierName() { 488 if (type == IDENT) { 489 return getIdent(); 490 } else if (isIdentifierName()) { 491 // Fake out identifier. 492 final long identToken = Token.recast(token, IDENT); 493 // Get IDENT. 494 final String ident = (String)getValue(identToken); 495 next(); 496 // Create IDENT node. 497 return createIdentNode(identToken, finish, ident); 498 } else { 499 expect(IDENT); 500 return null; 501 } 502 } 503 504 /** 505 * Create a LiteralNode from the current token 506 * 507 * @return LiteralNode representing the current token 508 * @throws ParserException if any literals fails to parse 509 */ 510 protected final LiteralNode<?> getLiteral() throws ParserException { 511 // Capture LITERAL token. 512 final long literalToken = token; 513 514 // Create literal node. 515 final Object value = getValue(); 516 // Advance to have a correct finish 517 next(); 518 519 LiteralNode<?> node = null; 520 521 if (value == null) { 522 node = LiteralNode.newInstance(literalToken, finish); 523 } else if (value instanceof Number) { 524 node = LiteralNode.newInstance(literalToken, finish, (Number)value); 525 } else if (value instanceof String) { 526 node = LiteralNode.newInstance(literalToken, finish, (String)value); 527 } else if (value instanceof LexerToken) { 528 if (value instanceof RegexToken) { 529 final RegexToken regex = (RegexToken)value; 530 try { 531 RegExpFactory.validate(regex.getExpression(), regex.getOptions()); 532 } catch (final ParserException e) { 533 throw error(e.getMessage()); 534 } 535 } 536 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value); 537 } else { 538 assert false : "unknown type for LiteralNode: " + value.getClass(); 539 } 540 541 return node; 542 } 543 }