1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.COMMENT; 29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 30 import static jdk.nashorn.internal.parser.TokenType.EOF; 31 import static jdk.nashorn.internal.parser.TokenType.EOL; 32 import static jdk.nashorn.internal.parser.TokenType.IDENT; 33 import java.util.HashMap; 34 import java.util.Map; 35 import jdk.nashorn.internal.ir.IdentNode; 36 import jdk.nashorn.internal.ir.LiteralNode; 37 import jdk.nashorn.internal.parser.Lexer.LexerToken; 38 import jdk.nashorn.internal.parser.Lexer.RegexToken; 39 import jdk.nashorn.internal.runtime.ECMAErrors; 40 import jdk.nashorn.internal.runtime.ErrorManager; 41 import jdk.nashorn.internal.runtime.JSErrorType; 42 import jdk.nashorn.internal.runtime.ParserException; 43 import jdk.nashorn.internal.runtime.Source; 44 import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 45 46 /** 47 * Base class for parsers. 48 */ 49 public abstract class AbstractParser { 50 /** Source to parse. */ 51 protected final Source source; 52 53 /** Error manager to report errors. */ 54 protected final ErrorManager errors; 55 56 /** Stream of lex tokens to parse. */ 57 protected TokenStream stream; 58 59 /** Index of current token. */ 60 protected int k; 61 62 /** Previous token - accessible to sub classes */ 63 protected long previousToken; 64 65 /** Descriptor of current token. */ 66 protected long token; 67 68 /** Type of current token. */ 69 protected TokenType type; 70 71 /** Type of last token. */ 72 protected TokenType last; 73 74 /** Start position of current token. */ 75 protected int start; 76 77 /** Finish position of previous token. */ 78 protected int finish; 79 80 /** Current line number. */ 81 protected int line; 82 83 /** Position of last EOL + 1. */ 84 protected int linePosition; 85 86 /** Lexer used to scan source content. */ 87 protected Lexer lexer; 88 89 /** Is this parser running under strict mode? */ 90 protected boolean isStrictMode; 91 92 /** What should line numbers be counted from? */ 93 protected final int lineOffset; 94 95 private final Map<String, String> canonicalNames = new HashMap<>(); 96 97 /** 98 * Construct a parser. 99 * 100 * @param source Source to parse. 101 * @param errors Error reporting manager. 102 * @param strict True if we are in strict mode 103 * @param lineOffset Offset from which lines should be counted 104 */ 105 protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) { 106 if (source.getLength() > Token.LENGTH_MASK) { 107 throw new RuntimeException("Source exceeds size limit of " + Token.LENGTH_MASK + " bytes"); 108 } 109 this.source = source; 110 this.errors = errors; 111 this.k = -1; 112 this.token = Token.toDesc(EOL, 0, 1); 113 this.type = EOL; 114 this.last = EOL; 115 this.isStrictMode = strict; 116 this.lineOffset = lineOffset; 117 } 118 119 /** 120 * Get the ith token. 121 * 122 * @param i Index of token. 123 * 124 * @return the token 125 */ 126 protected final long getToken(final int i) { 127 // Make sure there are enough tokens available. 128 while (i > stream.last()) { 129 // If we need to buffer more for lookahead. 130 if (stream.isFull()) { 131 stream.grow(); 132 } 133 134 // Get more tokens. 135 lexer.lexify(); 136 } 137 138 return stream.get(i); 139 } 140 141 /** 142 * Return the tokenType of the ith token. 143 * 144 * @param i Index of token 145 * 146 * @return the token type 147 */ 148 protected final TokenType T(final int i) { 149 // Get token descriptor and extract tokenType. 150 return Token.descType(getToken(i)); 151 } 152 153 /** 154 * Seek next token that is not an EOL or comment. 155 * 156 * @return tokenType of next token. 157 */ 158 protected final TokenType next() { 159 do { 160 nextOrEOL(); 161 } while (type == EOL || type == COMMENT); 162 163 return type; 164 } 165 166 /** 167 * Seek next token or EOL (skipping comments.) 168 * 169 * @return tokenType of next token. 170 */ 171 protected final TokenType nextOrEOL() { 172 do { 173 nextToken(); 174 if (type == DIRECTIVE_COMMENT) { 175 checkDirectiveComment(); 176 } 177 } while (type == COMMENT || type == DIRECTIVE_COMMENT); 178 179 return type; 180 } 181 182 // sourceURL= after directive comment 183 private static final String SOURCE_URL_PREFIX = "sourceURL="; 184 185 // currently only @sourceURL=foo supported 186 private void checkDirectiveComment() { 187 // if already set, ignore this one 188 if (source.getExplicitURL() != null) { 189 return; 190 } 191 192 final String comment = (String) lexer.getValueOf(token, isStrictMode); 193 final int len = comment.length(); 194 // 4 characters for directive comment marker //@\s or //#\s 195 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) { 196 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length())); 197 } 198 } 199 200 /** 201 * Seek next token. 202 * 203 * @return tokenType of next token. 204 */ 205 private TokenType nextToken() { 206 // Capture last token type, but ignore comments (which are irrelevant for the purpose of newline detection). 207 if (type != COMMENT) { 208 last = type; 209 } 210 if (type != EOF) { 211 212 // Set up next token. 213 k++; 214 final long lastToken = token; 215 previousToken = token; 216 token = getToken(k); 217 type = Token.descType(token); 218 219 // do this before the start is changed below 220 if (last != EOL) { 221 finish = start + Token.descLength(lastToken); 222 } 223 224 if (type == EOL) { 225 line = Token.descLength(token); 226 linePosition = Token.descPosition(token); 227 } else { 228 start = Token.descPosition(token); 229 } 230 231 } 232 233 return type; 234 } 235 236 /** 237 * Get the message string for a message ID and arguments 238 * 239 * @param msgId The Message ID 240 * @param args The arguments 241 * 242 * @return The message string 243 */ 244 protected static String message(final String msgId, final String... args) { 245 return ECMAErrors.getMessage("parser.error." + msgId, args); 246 } 247 248 /** 249 * Report an error. 250 * 251 * @param message Error message. 252 * @param errorToken Offending token. 253 * @return ParserException upon failure. Caller should throw and not ignore 254 */ 255 protected final ParserException error(final String message, final long errorToken) { 256 return error(JSErrorType.SYNTAX_ERROR, message, errorToken); 257 } 258 259 /** 260 * Report an error. 261 * 262 * @param errorType The error type 263 * @param message Error message. 264 * @param errorToken Offending token. 265 * @return ParserException upon failure. Caller should throw and not ignore 266 */ 267 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) { 268 final int position = Token.descPosition(errorToken); 269 final int lineNum = source.getLine(position); 270 final int columnNum = source.getColumn(position); 271 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken); 272 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken); 273 } 274 275 /** 276 * Report an error. 277 * 278 * @param message Error message. 279 * @return ParserException upon failure. Caller should throw and not ignore 280 */ 281 protected final ParserException error(final String message) { 282 return error(JSErrorType.SYNTAX_ERROR, message); 283 } 284 285 /** 286 * Report an error. 287 * 288 * @param errorType The error type 289 * @param message Error message. 290 * @return ParserException upon failure. Caller should throw and not ignore 291 */ 292 protected final ParserException error(final JSErrorType errorType, final String message) { 293 // TODO - column needs to account for tabs. 294 final int position = Token.descPosition(token); 295 final int column = position - linePosition; 296 final String formatted = ErrorManager.format(message, source, line, column, token); 297 return new ParserException(errorType, formatted, source, line, column, token); 298 } 299 300 /** 301 * Report a warning to the error manager. 302 * 303 * @param errorType The error type of the warning 304 * @param message Warning message. 305 * @param errorToken error token 306 */ 307 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) { 308 errors.warning(error(errorType, message, errorToken)); 309 } 310 311 /** 312 * Generate 'expected' message. 313 * 314 * @param expected Expected tokenType. 315 * 316 * @return the message string 317 */ 318 protected final String expectMessage(final TokenType expected) { 319 final String tokenString = Token.toString(source, token); 320 String msg; 321 322 if (expected == null) { 323 msg = AbstractParser.message("expected.stmt", tokenString); 324 } else { 325 final String expectedName = expected.getNameOrType(); 326 msg = AbstractParser.message("expected", expectedName, tokenString); 327 } 328 329 return msg; 330 } 331 332 /** 333 * Check current token and advance to the next token. 334 * 335 * @param expected Expected tokenType. 336 * 337 * @throws ParserException on unexpected token type 338 */ 339 protected final void expect(final TokenType expected) throws ParserException { 340 expectDontAdvance(expected); 341 next(); 342 } 343 344 /** 345 * Check current token, but don't advance to the next token. 346 * 347 * @param expected Expected tokenType. 348 * 349 * @throws ParserException on unexpected token type 350 */ 351 protected final void expectDontAdvance(final TokenType expected) throws ParserException { 352 if (type != expected) { 353 throw error(expectMessage(expected)); 354 } 355 } 356 357 /** 358 * Check next token, get its value and advance. 359 * 360 * @param expected Expected tokenType. 361 * @return The JavaScript value of the token 362 * @throws ParserException on unexpected token type 363 */ 364 protected final Object expectValue(final TokenType expected) throws ParserException { 365 if (type != expected) { 366 throw error(expectMessage(expected)); 367 } 368 369 final Object value = getValue(); 370 371 next(); 372 373 return value; 374 } 375 376 /** 377 * Get the value of the current token. 378 * 379 * @return JavaScript value of the token. 380 */ 381 protected final Object getValue() { 382 return getValue(token); 383 } 384 385 /** 386 * Get the value of a specific token 387 * 388 * @param valueToken the token 389 * 390 * @return JavaScript value of the token 391 */ 392 protected final Object getValue(final long valueToken) { 393 try { 394 return lexer.getValueOf(valueToken, isStrictMode); 395 } catch (final ParserException e) { 396 errors.error(e); 397 } 398 399 return null; 400 } 401 402 /** 403 * Certain future reserved words can be used as identifiers in 404 * non-strict mode. Check if the current token is one such. 405 * 406 * @return true if non strict mode identifier 407 */ 408 protected final boolean isNonStrictModeIdent() { 409 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT; 410 } 411 412 /** 413 * Get ident. 414 * 415 * @return Ident node. 416 */ 417 protected final IdentNode getIdent() { 418 // Capture IDENT token. 419 long identToken = token; 420 421 if (isNonStrictModeIdent()) { 422 // Fake out identifier. 423 identToken = Token.recast(token, IDENT); 424 // Get IDENT. 425 final String ident = (String)getValue(identToken); 426 427 next(); 428 429 // Create IDENT node. 430 return createIdentNode(identToken, finish, ident).setIsFutureStrictName(); 431 } 432 433 // Get IDENT. 434 final String ident = (String)expectValue(IDENT); 435 if (ident == null) { 436 return null; 437 } 438 // Create IDENT node. 439 return createIdentNode(identToken, finish, ident); 440 } 441 442 /** 443 * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String) 444 * constructor} but making sure that the {@code name} is deduplicated within this parse job. 445 * @param identToken the token for the new {@code IdentNode} 446 * @param identFinish the finish for the new {@code IdentNode} 447 * @param name the name for the new {@code IdentNode}. It will be de-duplicated. 448 * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will 449 * be deduplicated. 450 */ 451 protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) { 452 final String existingName = canonicalNames.putIfAbsent(name, name); 453 final String canonicalName = existingName != null ? existingName : name; 454 return new IdentNode(identToken, identFinish, canonicalName); 455 } 456 457 /** 458 * Check if current token is in identifier name 459 * 460 * @return true if current token is an identifier name 461 */ 462 protected final boolean isIdentifierName() { 463 final TokenKind kind = type.getKind(); 464 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) { 465 return true; 466 } 467 468 // only literals allowed are null, false and true 469 if (kind == TokenKind.LITERAL) { 470 switch (type) { 471 case FALSE: 472 case NULL: 473 case TRUE: 474 return true; 475 default: 476 return false; 477 } 478 } 479 480 // Fake out identifier. 481 final long identToken = Token.recast(token, IDENT); 482 // Get IDENT. 483 final String ident = (String)getValue(identToken); 484 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0)); 485 } 486 487 /** 488 * Create an IdentNode from the current token 489 * 490 * @return an IdentNode representing the current token 491 */ 492 protected final IdentNode getIdentifierName() { 493 if (type == IDENT) { 494 return getIdent(); 495 } else if (isIdentifierName()) { 496 // Fake out identifier. 497 final long identToken = Token.recast(token, IDENT); 498 // Get IDENT. 499 final String ident = (String)getValue(identToken); 500 next(); 501 // Create IDENT node. 502 return createIdentNode(identToken, finish, ident); 503 } else { 504 expect(IDENT); 505 return null; 506 } 507 } 508 509 /** 510 * Create a LiteralNode from the current token 511 * 512 * @return LiteralNode representing the current token 513 * @throws ParserException if any literals fails to parse 514 */ 515 protected final LiteralNode<?> getLiteral() throws ParserException { 516 // Capture LITERAL token. 517 final long literalToken = token; 518 519 // Create literal node. 520 final Object value = getValue(); 521 // Advance to have a correct finish 522 next(); 523 524 LiteralNode<?> node = null; 525 526 if (value == null) { 527 node = LiteralNode.newInstance(literalToken, finish); 528 } else if (value instanceof Number) { 529 node = LiteralNode.newInstance(literalToken, finish, (Number)value); 530 } else if (value instanceof String) { 531 node = LiteralNode.newInstance(literalToken, finish, (String)value); 532 } else if (value instanceof LexerToken) { 533 if (value instanceof RegexToken) { 534 final RegexToken regex = (RegexToken)value; 535 try { 536 RegExpFactory.validate(regex.getExpression(), regex.getOptions()); 537 } catch (final ParserException e) { 538 throw error(e.getMessage()); 539 } 540 } 541 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value); 542 } else { 543 assert false : "unknown type for LiteralNode: " + value.getClass(); 544 } 545 546 return node; 547 } 548 }