1 /*
2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package jdk.nashorn.internal.parser;
27
28 import static jdk.nashorn.internal.parser.TokenType.ADD;
29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
30 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
33 import static jdk.nashorn.internal.parser.TokenType.EOF;
34 import static jdk.nashorn.internal.parser.TokenType.EOL;
35 import static jdk.nashorn.internal.parser.TokenType.ERROR;
36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
38 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
41 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
42 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
43 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
48 import static jdk.nashorn.internal.parser.TokenType.STRING;
49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
53 import static jdk.nashorn.internal.parser.TokenType.XML;
54
55 import java.io.Serializable;
56
57 import jdk.nashorn.internal.runtime.ECMAErrors;
58 import jdk.nashorn.internal.runtime.ErrorManager;
59 import jdk.nashorn.internal.runtime.JSErrorType;
60 import jdk.nashorn.internal.runtime.JSType;
61 import jdk.nashorn.internal.runtime.ParserException;
62 import jdk.nashorn.internal.runtime.Source;
63 import jdk.nashorn.internal.runtime.options.Options;
64
65 /**
66 * Responsible for converting source content into a stream of tokens.
67 *
68 */
69 @SuppressWarnings("fallthrough")
70 public class Lexer extends Scanner {
71 private static final long MIN_INT_L = Integer.MIN_VALUE;
72 private static final long MAX_INT_L = Integer.MAX_VALUE;
73
74 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
75
76 /** Content source. */
77 private final Source source;
78
79 /** Buffered stream for tokens. */
80 private final TokenStream stream;
81
82 /** True if here and edit strings are supported. */
83 private final boolean scripting;
84
85 /** True if parsing in ECMAScript 6 mode. */
86 private final boolean es6;
87
88 /** True if a nested scan. (scan to completion, no EOF.) */
89 private final boolean nested;
90
91 /** Pending new line number and position. */
92 int pendingLine;
93
94 /** Position of last EOL + 1. */
95 private int linePosition;
96
97 /** Type of last token added. */
98 private TokenType last;
99
100 private final boolean pauseOnFunctionBody;
101 private boolean pauseOnNextLeftBrace;
102
103 private int templateExpressionOpenBraces;
104
105 private static final String SPACETAB = " \t"; // ASCII space and tab
106 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m)
107
108 private static final String JAVASCRIPT_WHITESPACE_EOL =
109 LFCR +
110 "\u2028" + // line separator
111 "\u2029" // paragraph separator
112 ;
113 private static final String JAVASCRIPT_WHITESPACE =
114 SPACETAB +
115 JAVASCRIPT_WHITESPACE_EOL +
116 "\u000b" + // tabulation line
117 "\u000c" + // ff (ctrl-l)
118 "\u00a0" + // Latin-1 space
119 "\u1680" + // Ogham space mark
120 "\u180e" + // separator, Mongolian vowel
121 "\u2000" + // en quad
122 "\u2001" + // em quad
123 "\u2002" + // en space
124 "\u2003" + // em space
125 "\u2004" + // three-per-em space
126 "\u2005" + // four-per-em space
127 "\u2006" + // six-per-em space
128 "\u2007" + // figure space
129 "\u2008" + // punctuation space
130 "\u2009" + // thin space
131 "\u200a" + // hair space
132 "\u202f" + // narrow no-break space
133 "\u205f" + // medium mathematical space
134 "\u3000" + // ideographic space
135 "\ufeff" // byte order mark
136 ;
137
138 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
139 "\\u000a" + // line feed
140 "\\u000d" + // carriage return (ctrl-m)
141 "\\u2028" + // line separator
142 "\\u2029" + // paragraph separator
143 "\\u0009" + // tab
144 "\\u0020" + // ASCII space
145 "\\u000b" + // tabulation line
146 "\\u000c" + // ff (ctrl-l)
147 "\\u00a0" + // Latin-1 space
148 "\\u1680" + // Ogham space mark
149 "\\u180e" + // separator, Mongolian vowel
150 "\\u2000" + // en quad
151 "\\u2001" + // em quad
152 "\\u2002" + // en space
153 "\\u2003" + // em space
154 "\\u2004" + // three-per-em space
155 "\\u2005" + // four-per-em space
156 "\\u2006" + // six-per-em space
157 "\\u2007" + // figure space
158 "\\u2008" + // punctuation space
159 "\\u2009" + // thin space
160 "\\u200a" + // hair space
161 "\\u202f" + // narrow no-break space
162 "\\u205f" + // medium mathematical space
163 "\\u3000" + // ideographic space
164 "\\ufeff" // byte order mark
165 ;
166
167 static String unicodeEscape(final char ch) {
168 final StringBuilder sb = new StringBuilder();
169
170 sb.append("\\u");
171
172 final String hex = Integer.toHexString(ch);
173 for (int i = hex.length(); i < 4; i++) {
174 sb.append('0');
175 }
176 sb.append(hex);
177
178 return sb.toString();
179 }
180
181 /**
182 * Constructor
183 *
184 * @param source the source
185 * @param stream the token stream to lex
186 */
187 public Lexer(final Source source, final TokenStream stream) {
188 this(source, stream, false, false);
189 }
190
191 /**
192 * Constructor
193 *
194 * @param source the source
195 * @param stream the token stream to lex
196 * @param scripting are we in scripting mode
197 * @param es6 are we in ECMAScript 6 mode
198 */
199 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
200 this(source, 0, source.getLength(), stream, scripting, es6, false);
201 }
202
203 /**
204 * Constructor
205 *
206 * @param source the source
207 * @param start start position in source from which to start lexing
208 * @param len length of source segment to lex
209 * @param stream token stream to lex
210 * @param scripting are we in scripting mode
211 * @param es6 are we in ECMAScript 6 mode
212 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
213 * function body. This is used with the feature where the parser is skipping nested function bodies to
214 * avoid reading ahead unnecessarily when we skip the function bodies.
215 */
216 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
217 super(source.getContent(), 1, start, len);
218 this.source = source;
219 this.stream = stream;
220 this.scripting = scripting;
221 this.es6 = es6;
222 this.nested = false;
223 this.pendingLine = 1;
224 this.last = EOL;
225
226 this.pauseOnFunctionBody = pauseOnFunctionBody;
227 }
228
229 private Lexer(final Lexer lexer, final State state) {
230 super(lexer, state);
231
232 source = lexer.source;
233 stream = lexer.stream;
234 scripting = lexer.scripting;
235 es6 = lexer.es6;
236 nested = true;
237
238 pendingLine = state.pendingLine;
239 linePosition = state.linePosition;
240 last = EOL;
241 pauseOnFunctionBody = false;
242 }
243
244 static class State extends Scanner.State {
245 /** Pending new line number and position. */
246 public final int pendingLine;
247
248 /** Position of last EOL + 1. */
249 public final int linePosition;
250
251 /** Type of last token added. */
252 public final TokenType last;
253
254 /*
255 * Constructor.
256 */
257
258 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
259 super(position, limit, line);
260
261 this.pendingLine = pendingLine;
262 this.linePosition = linePosition;
263 this.last = last;
264 }
265 }
266
267 /**
268 * Save the state of the scan.
269 *
270 * @return Captured state.
271 */
272 @Override
273 State saveState() {
274 return new State(position, limit, line, pendingLine, linePosition, last);
275 }
276
277 /**
278 * Restore the state of the scan.
279 *
280 * @param state
281 * Captured state.
282 */
283 void restoreState(final State state) {
284 super.restoreState(state);
285
286 pendingLine = state.pendingLine;
287 linePosition = state.linePosition;
288 last = state.last;
289 }
290
291 /**
292 * Add a new token to the stream.
293 *
294 * @param type
295 * Token type.
296 * @param start
297 * Start position.
298 * @param end
299 * End position.
300 */
301 protected void add(final TokenType type, final int start, final int end) {
302 // Record last token.
303 last = type;
304
305 // Only emit the last EOL in a cluster.
306 if (type == EOL) {
307 pendingLine = end;
308 linePosition = start;
309 } else {
310 // Write any pending EOL to stream.
311 if (pendingLine != -1) {
312 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
313 pendingLine = -1;
314 }
315
316 // Write token to stream.
317 stream.put(Token.toDesc(type, start, end - start));
318 }
319 }
320
321 /**
322 * Add a new token to the stream.
323 *
324 * @param type
325 * Token type.
326 * @param start
327 * Start position.
328 */
329 protected void add(final TokenType type, final int start) {
330 add(type, start, position);
331 }
332
333 /**
334 * Return the String of valid whitespace characters for regular
335 * expressions in JavaScript
336 * @return regexp whitespace string
337 */
338 public static String getWhitespaceRegExp() {
339 return JAVASCRIPT_WHITESPACE_IN_REGEXP;
340 }
341
342 /**
343 * Skip end of line.
344 *
345 * @param addEOL true if EOL token should be recorded.
346 */
347 private void skipEOL(final boolean addEOL) {
348
349 if (ch0 == '\r') { // detect \r\n pattern
350 skip(1);
351 if (ch0 == '\n') {
352 skip(1);
353 }
354 } else { // all other space, ch0 is guaranteed to be EOL or \0
355 skip(1);
356 }
357
358 // bump up line count
359 line++;
360
361 if (addEOL) {
362 // Add an EOL token.
363 add(EOL, position, line);
364 }
365 }
366
367 /**
368 * Skip over rest of line including end of line.
369 *
370 * @param addEOL true if EOL token should be recorded.
371 */
372 private void skipLine(final boolean addEOL) {
373 // Ignore characters.
374 while (!isEOL(ch0) && !atEOF()) {
375 skip(1);
376 }
377 // Skip over end of line.
378 skipEOL(addEOL);
379 }
380
381 /**
382 * Test whether a char is valid JavaScript whitespace
383 * @param ch a char
384 * @return true if valid JavaScript whitespace
385 */
386 public static boolean isJSWhitespace(final char ch) {
387 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
388 }
389
390 /**
391 * Test whether a char is valid JavaScript end of line
392 * @param ch a char
393 * @return true if valid JavaScript end of line
394 */
395 public static boolean isJSEOL(final char ch) {
396 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
397 }
398
399 /**
400 * Test if char is a string delimiter, e.g. '\' or '"'.
401 * @param ch a char
402 * @return true if string delimiter
403 */
404 protected boolean isStringDelimiter(final char ch) {
405 return ch == '\'' || ch == '"';
406 }
407
408 /**
409 * Test if char is a template literal delimiter ('`').
410 */
411 private static boolean isTemplateDelimiter(char ch) {
412 return ch == '`';
413 }
414
415 /**
416 * Test whether a char is valid JavaScript whitespace
417 * @param ch a char
418 * @return true if valid JavaScript whitespace
419 */
420 protected boolean isWhitespace(final char ch) {
421 return Lexer.isJSWhitespace(ch);
422 }
423
424 /**
425 * Test whether a char is valid JavaScript end of line
426 * @param ch a char
427 * @return true if valid JavaScript end of line
428 */
429 protected boolean isEOL(final char ch) {
430 return Lexer.isJSEOL(ch);
431 }
432
433 /**
434 * Skip over whitespace and detect end of line, adding EOL tokens if
435 * encountered.
436 *
437 * @param addEOL true if EOL tokens should be recorded.
438 */
439 private void skipWhitespace(final boolean addEOL) {
440 while (isWhitespace(ch0)) {
441 if (isEOL(ch0)) {
442 skipEOL(addEOL);
443 } else {
444 skip(1);
445 }
446 }
447 }
448
449 /**
450 * Skip over comments.
451 *
452 * @return True if a comment.
453 */
454 protected boolean skipComments() {
455 // Save the current position.
456 final int start = position;
457
458 if (ch0 == '/') {
459 // Is it a // comment.
460 if (ch1 == '/') {
461 // Skip over //.
462 skip(2);
463
464 boolean directiveComment = false;
465 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
466 directiveComment = true;
467 }
468
469 // Scan for EOL.
470 while (!atEOF() && !isEOL(ch0)) {
471 skip(1);
472 }
473 // Did detect a comment.
474 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
475 return true;
476 } else if (ch1 == '*') {
477 // Skip over /*.
478 skip(2);
479 // Scan for */.
480 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
481 // If end of line handle else skip character.
482 if (isEOL(ch0)) {
483 skipEOL(true);
484 } else {
485 skip(1);
486 }
487 }
488
489 if (atEOF()) {
490 // TODO - Report closing */ missing in parser.
491 add(ERROR, start);
492 } else {
493 // Skip */.
494 skip(2);
495 }
496
497 // Did detect a comment.
498 add(COMMENT, start);
499 return true;
500 }
501 } else if (ch0 == '#') {
502 assert scripting;
503 // shell style comment
504 // Skip over #.
505 skip(1);
506 // Scan for EOL.
507 while (!atEOF() && !isEOL(ch0)) {
508 skip(1);
509 }
510 // Did detect a comment.
511 add(COMMENT, start);
512 return true;
513 }
514
515 // Not a comment.
516 return false;
517 }
518
519 /**
520 * Convert a regex token to a token object.
521 *
522 * @param start Position in source content.
523 * @param length Length of regex token.
524 * @return Regex token object.
525 */
526 public RegexToken valueOfPattern(final int start, final int length) {
527 // Save the current position.
528 final int savePosition = position;
529 // Reset to beginning of content.
530 reset(start);
531 // Buffer for recording characters.
532 final StringBuilder sb = new StringBuilder(length);
533
534 // Skip /.
535 skip(1);
536 boolean inBrackets = false;
537 // Scan for closing /, stopping at end of line.
538 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
539 // Skip over escaped character.
540 if (ch0 == '\\') {
541 sb.append(ch0);
542 sb.append(ch1);
543 skip(2);
544 } else {
545 if (ch0 == '[') {
546 inBrackets = true;
547 } else if (ch0 == ']') {
548 inBrackets = false;
549 }
550
551 // Skip literal character.
552 sb.append(ch0);
553 skip(1);
554 }
555 }
556
557 // Get pattern as string.
558 final String regex = sb.toString();
559
560 // Skip /.
561 skip(1);
562
563 // Options as string.
564 final String options = source.getString(position, scanIdentifier());
565
566 reset(savePosition);
567
568 // Compile the pattern.
569 return new RegexToken(regex, options);
570 }
571
572 /**
573 * Return true if the given token can be the beginning of a literal.
574 *
575 * @param token a token
576 * @return true if token can start a literal.
577 */
578 public boolean canStartLiteral(final TokenType token) {
579 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
580 }
581
582 /**
583 * interface to receive line information for multi-line literals.
584 */
585 protected interface LineInfoReceiver {
586 /**
587 * Receives line information
588 * @param line last line number
589 * @param linePosition position of last line
590 */
591 public void lineInfo(int line, int linePosition);
592 }
593
594 /**
595 * Check whether the given token represents the beginning of a literal. If so scan
596 * the literal and return <tt>true</tt>, otherwise return false.
597 *
598 * @param token the token.
599 * @param startTokenType the token type.
600 * @param lir LineInfoReceiver that receives line info for multi-line string literals.
601 * @return True if a literal beginning with startToken was found and scanned.
602 */
603 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
604 // Check if it can be a literal.
605 if (!canStartLiteral(startTokenType)) {
606 return false;
607 }
608 // We break on ambiguous tokens so if we already moved on it can't be a literal.
609 if (stream.get(stream.last()) != token) {
610 return false;
611 }
612 // Rewind to token start position
613 reset(Token.descPosition(token));
614
615 if (ch0 == '/') {
616 return scanRegEx();
617 } else if (ch0 == '<') {
618 if (ch1 == '<') {
619 return scanHereString(lir);
620 } else if (Character.isJavaIdentifierStart(ch1)) {
621 return scanXMLLiteral();
622 }
623 }
624
625 return false;
626 }
627
628 /**
629 * Scan over regex literal.
630 *
631 * @return True if a regex literal.
632 */
633 private boolean scanRegEx() {
634 assert ch0 == '/';
635 // Make sure it's not a comment.
636 if (ch1 != '/' && ch1 != '*') {
637 // Record beginning of literal.
638 final int start = position;
639 // Skip /.
640 skip(1);
641 boolean inBrackets = false;
642
643 // Scan for closing /, stopping at end of line.
644 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
645 // Skip over escaped character.
646 if (ch0 == '\\') {
647 skip(1);
648 if (isEOL(ch0)) {
649 reset(start);
650 return false;
651 }
652 skip(1);
653 } else {
654 if (ch0 == '[') {
655 inBrackets = true;
656 } else if (ch0 == ']') {
657 inBrackets = false;
658 }
659
660 // Skip literal character.
661 skip(1);
662 }
663 }
664
665 // If regex literal.
666 if (ch0 == '/') {
667 // Skip /.
668 skip(1);
669
670 // Skip over options.
671 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
672 skip(1);
673 }
674
675 // Add regex token.
676 add(REGEX, start);
677 // Regex literal detected.
678 return true;
679 }
680
681 // False start try again.
682 reset(start);
683 }
684
685 // Regex literal not detected.
686 return false;
687 }
688
689 /**
690 * Convert a digit to a integer. Can't use Character.digit since we are
691 * restricted to ASCII by the spec.
692 *
693 * @param ch Character to convert.
694 * @param base Numeric base.
695 *
696 * @return The converted digit or -1 if invalid.
697 */
698 protected static int convertDigit(final char ch, final int base) {
699 int digit;
700
701 if ('0' <= ch && ch <= '9') {
702 digit = ch - '0';
703 } else if ('A' <= ch && ch <= 'Z') {
704 digit = ch - 'A' + 10;
705 } else if ('a' <= ch && ch <= 'z') {
706 digit = ch - 'a' + 10;
707 } else {
708 return -1;
709 }
710
711 return digit < base ? digit : -1;
712 }
713
714
715 /**
716 * Get the value of a hexadecimal numeric sequence.
717 *
718 * @param length Number of digits.
719 * @param type Type of token to report against.
720 * @return Value of sequence or < 0 if no digits.
721 */
722 private int hexSequence(final int length, final TokenType type) {
723 int value = 0;
724
725 for (int i = 0; i < length; i++) {
726 final int digit = convertDigit(ch0, 16);
727
728 if (digit == -1) {
729 error(Lexer.message("invalid.hex"), type, position, limit);
730 return i == 0 ? -1 : value;
731 }
732
733 value = digit | value << 4;
734 skip(1);
735 }
736
737 return value;
738 }
739
740 /**
741 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
742 *
743 * @return Value of sequence.
744 */
745 private int octalSequence() {
746 int value = 0;
747
748 for (int i = 0; i < 3; i++) {
749 final int digit = convertDigit(ch0, 8);
750
751 if (digit == -1) {
752 break;
753 }
754 value = digit | value << 3;
755 skip(1);
756
757 if (i == 1 && value >= 32) {
758 break;
759 }
760 }
761 return value;
762 }
763
764 /**
765 * Convert a string to a JavaScript identifier.
766 *
767 * @param start Position in source content.
768 * @param length Length of token.
769 * @return Ident string or null if an error.
770 */
771 private String valueOfIdent(final int start, final int length) throws RuntimeException {
772 // Save the current position.
773 final int savePosition = position;
774 // End of scan.
775 final int end = start + length;
776 // Reset to beginning of content.
777 reset(start);
778 // Buffer for recording characters.
779 final StringBuilder sb = new StringBuilder(length);
780
781 // Scan until end of line or end of file.
782 while (!atEOF() && position < end && !isEOL(ch0)) {
783 // If escape character.
784 if (ch0 == '\\' && ch1 == 'u') {
785 skip(2);
786 final int ch = hexSequence(4, TokenType.IDENT);
787 if (isWhitespace((char)ch)) {
788 return null;
789 }
790 if (ch < 0) {
791 sb.append('\\');
792 sb.append('u');
793 } else {
794 sb.append((char)ch);
795 }
796 } else {
797 // Add regular character.
798 sb.append(ch0);
799 skip(1);
800 }
801 }
802
803 // Restore position.
804 reset(savePosition);
805
806 return sb.toString();
807 }
808
809 /**
810 * Scan over and identifier or keyword. Handles identifiers containing
811 * encoded Unicode chars.
812 *
813 * Example:
814 *
815 * var \u0042 = 44;
816 */
817 private void scanIdentifierOrKeyword() {
818 // Record beginning of identifier.
819 final int start = position;
820 // Scan identifier.
821 final int length = scanIdentifier();
822 // Check to see if it is a keyword.
823 final TokenType type = TokenLookup.lookupKeyword(content, start, length);
824 if (type == FUNCTION && pauseOnFunctionBody) {
825 pauseOnNextLeftBrace = true;
826 }
827 // Add keyword or identifier token.
828 add(type, start);
829 }
830
831 /**
832 * Convert a string to a JavaScript string object.
833 *
834 * @param start Position in source content.
835 * @param length Length of token.
836 * @return JavaScript string object.
837 */
838 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
839 // Save the current position.
840 final int savePosition = position;
841 // Calculate the end position.
842 final int end = start + length;
843 // Reset to beginning of string.
844 reset(start);
845
846 // Buffer for recording characters.
847 final StringBuilder sb = new StringBuilder(length);
848
849 // Scan until end of string.
850 while (position < end) {
851 // If escape character.
852 if (ch0 == '\\') {
853 skip(1);
854
855 final char next = ch0;
856 final int afterSlash = position;
857
858 skip(1);
859
860 // Special characters.
861 switch (next) {
862 case '0':
863 case '1':
864 case '2':
865 case '3':
866 case '4':
867 case '5':
868 case '6':
869 case '7': {
870 if (strict) {
871 // "\0" itself is allowed in strict mode. Only other 'real'
872 // octal escape sequences are not allowed (eg. "\02", "\31").
873 // See section 7.8.4 String literals production EscapeSequence
874 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
875 error(Lexer.message("strict.no.octal"), STRING, position, limit);
876 }
877 }
878 reset(afterSlash);
879 // Octal sequence.
880 final int ch = octalSequence();
881
882 if (ch < 0) {
883 sb.append('\\');
884 sb.append('x');
885 } else {
886 sb.append((char)ch);
887 }
888 break;
889 }
890 case 'n':
891 sb.append('\n');
892 break;
893 case 't':
894 sb.append('\t');
895 break;
896 case 'b':
897 sb.append('\b');
898 break;
899 case 'f':
900 sb.append('\f');
901 break;
902 case 'r':
903 sb.append('\r');
904 break;
905 case '\'':
906 sb.append('\'');
907 break;
908 case '\"':
909 sb.append('\"');
910 break;
911 case '\\':
912 sb.append('\\');
913 break;
914 case '\r': // CR | CRLF
915 if (ch0 == '\n') {
916 skip(1);
917 }
918 // fall through
919 case '\n': // LF
920 case '\u2028': // LS
921 case '\u2029': // PS
922 // continue on the next line, slash-return continues string
923 // literal
924 break;
925 case 'x': {
926 // Hex sequence.
927 final int ch = hexSequence(2, STRING);
928
929 if (ch < 0) {
930 sb.append('\\');
931 sb.append('x');
932 } else {
933 sb.append((char)ch);
934 }
935 }
936 break;
937 case 'u': {
938 // Unicode sequence.
939 final int ch = hexSequence(4, STRING);
940
941 if (ch < 0) {
942 sb.append('\\');
943 sb.append('u');
944 } else {
945 sb.append((char)ch);
946 }
947 }
948 break;
949 case 'v':
950 sb.append('\u000B');
951 break;
952 // All other characters.
953 default:
954 sb.append(next);
955 break;
956 }
957 } else if (ch0 == '\r') {
958 // Convert CR-LF or CR to LF line terminator.
959 sb.append('\n');
960 skip(ch1 == '\n' ? 2 : 1);
961 } else {
962 // Add regular character.
963 sb.append(ch0);
964 skip(1);
965 }
966 }
967
968 // Restore position.
969 reset(savePosition);
970
971 return sb.toString();
972 }
973
974 /**
975 * Scan over a string literal.
976 * @param add true if we are not just scanning but should actually modify the token stream
977 */
978 protected void scanString(final boolean add) {
979 // Type of string.
980 TokenType type = STRING;
981 // Record starting quote.
982 final char quote = ch0;
983 // Skip over quote.
984 skip(1);
985
986 // Record beginning of string content.
987 final State stringState = saveState();
988
989 // Scan until close quote or end of line.
990 while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
991 // Skip over escaped character.
992 if (ch0 == '\\') {
993 type = ESCSTRING;
994 skip(1);
995 if (! isEscapeCharacter(ch0)) {
996 error(Lexer.message("invalid.escape.char"), STRING, position, limit);
997 }
998 if (isEOL(ch0)) {
999 // Multiline string literal
1000 skipEOL(false);
1001 continue;
1002 }
1003 }
1004 // Skip literal character.
1005 skip(1);
1006 }
1007
1008 // If close quote.
1009 if (ch0 == quote) {
1010 // Skip close quote.
1011 skip(1);
1012 } else {
1013 error(Lexer.message("missing.close.quote"), STRING, position, limit);
1014 }
1015
1016 // If not just scanning.
1017 if (add) {
1018 // Record end of string.
1019 stringState.setLimit(position - 1);
1020
1021 if (scripting && !stringState.isEmpty()) {
1022 switch (quote) {
1023 case '`':
1024 // Mark the beginning of an exec string.
1025 add(EXECSTRING, stringState.position, stringState.limit);
1026 // Frame edit string with left brace.
1027 add(LBRACE, stringState.position, stringState.position);
1028 // Process edit string.
1029 editString(type, stringState);
1030 // Frame edit string with right brace.
1031 add(RBRACE, stringState.limit, stringState.limit);
1032 break;
1033 case '"':
1034 // Only edit double quoted strings.
1035 editString(type, stringState);
1036 break;
1037 case '\'':
1038 // Add string token without editing.
1039 add(type, stringState.position, stringState.limit);
1040 break;
1041 default:
1042 break;
1043 }
1044 } else {
1045 /// Add string token without editing.
1046 add(type, stringState.position, stringState.limit);
1047 }
1048 }
1049 }
1050
1051 /**
1052 * Scan over a template string literal.
1053 */
1054 private void scanTemplate() {
1055 assert ch0 == '`';
1056 TokenType type = TEMPLATE;
1057
1058 // Skip over quote and record beginning of string content.
1059 skip(1);
1060 State stringState = saveState();
1061
1062 // Scan until close quote
1063 while (!atEOF()) {
1064 // Skip over escaped character.
1065 if (ch0 == '`') {
1066 skip(1);
1067 // Record end of string.
1068 stringState.setLimit(position - 1);
1069 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1070 return;
1071 } else if (ch0 == '$' && ch1 == '{') {
1072 skip(2);
1073 stringState.setLimit(position - 2);
1074 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1075
1076 // scan to RBRACE
1077 Lexer expressionLexer = new Lexer(this, saveState());
1078 expressionLexer.templateExpressionOpenBraces = 1;
1079 expressionLexer.lexify();
1080 restoreState(expressionLexer.saveState());
1081
1082 // scan next middle or tail of the template literal
1083 assert ch0 == '}';
1084 type = TEMPLATE_MIDDLE;
1085
1086 // Skip over rbrace and record beginning of string content.
1087 skip(1);
1088 stringState = saveState();
1089
1090 continue;
1091 } else if (ch0 == '\\') {
1092 skip(1);
1093 // EscapeSequence
1094 if (!isEscapeCharacter(ch0)) {
1095 error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1096 }
1097 if (isEOL(ch0)) {
1098 // LineContinuation
1099 skipEOL(false);
1100 continue;
1101 }
1102 } else if (isEOL(ch0)) {
1103 // LineTerminatorSequence
1104 skipEOL(false);
1105 continue;
1106 }
1107
1108 // Skip literal character.
1109 skip(1);
1110 }
1111
1112 error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1113 }
1114
1115 /**
1116 * Is the given character a valid escape char after "\" ?
1117 *
1118 * @param ch character to be checked
1119 * @return if the given character is valid after "\"
1120 */
1121 protected boolean isEscapeCharacter(final char ch) {
1122 return true;
1123 }
1124
1125 /**
1126 * Convert string to number.
1127 *
1128 * @param valueString String to convert.
1129 * @param radix Numeric base.
1130 * @return Converted number.
1131 */
1132 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1133 try {
1134 return Integer.parseInt(valueString, radix);
1135 } catch (final NumberFormatException e) {
1136 if (radix == 10) {
1137 return Double.valueOf(valueString);
1138 }
1139
1140 double value = 0.0;
1141
1142 for (int i = 0; i < valueString.length(); i++) {
1143 final char ch = valueString.charAt(i);
1144 // Preverified, should always be a valid digit.
1145 final int digit = convertDigit(ch, radix);
1146 value *= radix;
1147 value += digit;
1148 }
1149
1150 return value;
1151 }
1152 }
1153
1154 /**
1155 * Scan a number.
1156 */
1157 protected void scanNumber() {
1158 // Record beginning of number.
1159 final int start = position;
1160 // Assume value is a decimal.
1161 TokenType type = DECIMAL;
1162
1163 // First digit of number.
1164 int digit = convertDigit(ch0, 10);
1165
1166 // If number begins with 0x.
1167 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1168 // Skip over 0xN.
1169 skip(3);
1170 // Skip over remaining digits.
1171 while (convertDigit(ch0, 16) != -1) {
1172 skip(1);
1173 }
1174
1175 type = HEXADECIMAL;
1176 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1177 // Skip over 0oN.
1178 skip(3);
1179 // Skip over remaining digits.
1180 while (convertDigit(ch0, 8) != -1) {
1181 skip(1);
1182 }
1183
1184 type = OCTAL;
1185 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1186 // Skip over 0bN.
1187 skip(3);
1188 // Skip over remaining digits.
1189 while (convertDigit(ch0, 2) != -1) {
1190 skip(1);
1191 }
1192
1193 type = BINARY_NUMBER;
1194 } else {
1195 // Check for possible octal constant.
1196 boolean octal = digit == 0;
1197 // Skip first digit if not leading '.'.
1198 if (digit != -1) {
1199 skip(1);
1200 }
1201
1202 // Skip remaining digits.
1203 while ((digit = convertDigit(ch0, 10)) != -1) {
1204 // Check octal only digits.
1205 octal = octal && digit < 8;
1206 // Skip digit.
1207 skip(1);
1208 }
1209
1210 if (octal && position - start > 1) {
1211 type = OCTAL_LEGACY;
1212 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1213 // Must be a double.
1214 if (ch0 == '.') {
1215 // Skip period.
1216 skip(1);
1217 // Skip mantissa.
1218 while (convertDigit(ch0, 10) != -1) {
1219 skip(1);
1220 }
1221 }
1222
1223 // Detect exponent.
1224 if (ch0 == 'E' || ch0 == 'e') {
1225 // Skip E.
1226 skip(1);
1227 // Detect and skip exponent sign.
1228 if (ch0 == '+' || ch0 == '-') {
1229 skip(1);
1230 }
1231 // Skip exponent.
1232 while (convertDigit(ch0, 10) != -1) {
1233 skip(1);
1234 }
1235 }
1236
1237 type = FLOATING;
1238 }
1239 }
1240
1241 if (Character.isJavaIdentifierStart(ch0)) {
1242 error(Lexer.message("missing.space.after.number"), type, position, 1);
1243 }
1244
1245 // Add number token.
1246 add(type, start);
1247 }
1248
1249 /**
1250 * Convert a regex token to a token object.
1251 *
1252 * @param start Position in source content.
1253 * @param length Length of regex token.
1254 * @return Regex token object.
1255 */
1256 XMLToken valueOfXML(final int start, final int length) {
1257 return new XMLToken(source.getString(start, length));
1258 }
1259
1260 /**
1261 * Scan over a XML token.
1262 *
1263 * @return TRUE if is an XML literal.
1264 */
1265 private boolean scanXMLLiteral() {
1266 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1267 if (XML_LITERALS) {
1268 // Record beginning of xml expression.
1269 final int start = position;
1270
1271 int openCount = 0;
1272
1273 do {
1274 if (ch0 == '<') {
1275 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1276 skip(3);
1277 openCount--;
1278 } else if (Character.isJavaIdentifierStart(ch1)) {
1279 skip(2);
1280 openCount++;
1281 } else if (ch1 == '?') {
1282 skip(2);
1283 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1284 skip(4);
1285 } else {
1286 reset(start);
1287 return false;
1288 }
1289
1290 while (!atEOF() && ch0 != '>') {
1291 if (ch0 == '/' && ch1 == '>') {
1292 openCount--;
1293 skip(1);
1294 break;
1295 } else if (ch0 == '\"' || ch0 == '\'') {
1296 scanString(false);
1297 } else {
1298 skip(1);
1299 }
1300 }
1301
1302 if (ch0 != '>') {
1303 reset(start);
1304 return false;
1305 }
1306
1307 skip(1);
1308 } else if (atEOF()) {
1309 reset(start);
1310 return false;
1311 } else {
1312 skip(1);
1313 }
1314 } while (openCount > 0);
1315
1316 add(XML, start);
1317 return true;
1318 }
1319
1320 return false;
1321 }
1322
1323 /**
1324 * Scan over identifier characters.
1325 *
1326 * @return Length of identifier or zero if none found.
1327 */
1328 private int scanIdentifier() {
1329 final int start = position;
1330
1331 // Make sure first character is valid start character.
1332 if (ch0 == '\\' && ch1 == 'u') {
1333 skip(2);
1334 final int ch = hexSequence(4, TokenType.IDENT);
1335
1336 if (!Character.isJavaIdentifierStart(ch)) {
1337 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1338 }
1339 } else if (!Character.isJavaIdentifierStart(ch0)) {
1340 // Not an identifier.
1341 return 0;
1342 }
1343
1344 // Make sure remaining characters are valid part characters.
1345 while (!atEOF()) {
1346 if (ch0 == '\\' && ch1 == 'u') {
1347 skip(2);
1348 final int ch = hexSequence(4, TokenType.IDENT);
1349
1350 if (!Character.isJavaIdentifierPart(ch)) {
1351 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1352 }
1353 } else if (Character.isJavaIdentifierPart(ch0)) {
1354 skip(1);
1355 } else {
1356 break;
1357 }
1358 }
1359
1360 // Length of identifier sequence.
1361 return position - start;
1362 }
1363
1364 /**
1365 * Compare two identifiers (in content) for equality.
1366 *
1367 * @param aStart Start of first identifier.
1368 * @param aLength Length of first identifier.
1369 * @param bStart Start of second identifier.
1370 * @param bLength Length of second identifier.
1371 * @return True if equal.
1372 */
1373 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1374 if (aLength == bLength) {
1375 for (int i = 0; i < aLength; i++) {
1376 if (content[aStart + i] != content[bStart + i]) {
1377 return false;
1378 }
1379 }
1380
1381 return true;
1382 }
1383
1384 return false;
1385 }
1386
1387 /**
1388 * Detect if a line starts with a marker identifier.
1389 *
1390 * @param identStart Start of identifier.
1391 * @param identLength Length of identifier.
1392 * @return True if detected.
1393 */
1394 private boolean hasHereMarker(final int identStart, final int identLength) {
1395 // Skip any whitespace.
1396 skipWhitespace(false);
1397
1398 return identifierEqual(identStart, identLength, position, scanIdentifier());
1399 }
1400
1401 /**
1402 * Lexer to service edit strings.
1403 */
1404 private static class EditStringLexer extends Lexer {
1405 /** Type of string literals to emit. */
1406 final TokenType stringType;
1407
1408 /*
1409 * Constructor.
1410 */
1411
1412 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1413 super(lexer, stringState);
1414
1415 this.stringType = stringType;
1416 }
1417
1418 /**
1419 * Lexify the contents of the string.
1420 */
1421 @Override
1422 public void lexify() {
1423 // Record start of string position.
1424 int stringStart = position;
1425 // Indicate that the priming first string has not been emitted.
1426 boolean primed = false;
1427
1428 while (true) {
1429 // Detect end of content.
1430 if (atEOF()) {
1431 break;
1432 }
1433
1434 // Honour escapes (should be well formed.)
1435 if (ch0 == '\\' && stringType == ESCSTRING) {
1436 skip(2);
1437
1438 continue;
1439 }
1440
1441 // If start of expression.
1442 if (ch0 == '$' && ch1 == '{') {
1443 if (!primed || stringStart != position) {
1444 if (primed) {
1445 add(ADD, stringStart, stringStart + 1);
1446 }
1447
1448 add(stringType, stringStart, position);
1449 primed = true;
1450 }
1451
1452 // Skip ${
1453 skip(2);
1454
1455 // Save expression state.
1456 final State expressionState = saveState();
1457
1458 // Start with one open brace.
1459 int braceCount = 1;
1460
1461 // Scan for the rest of the string.
1462 while (!atEOF()) {
1463 // If closing brace.
1464 if (ch0 == '}') {
1465 // Break only only if matching brace.
1466 if (--braceCount == 0) {
1467 break;
1468 }
1469 } else if (ch0 == '{') {
1470 // Bump up the brace count.
1471 braceCount++;
1472 }
1473
1474 // Skip to next character.
1475 skip(1);
1476 }
1477
1478 // If braces don't match then report an error.
1479 if (braceCount != 0) {
1480 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1481 }
1482
1483 // Mark end of expression.
1484 expressionState.setLimit(position);
1485 // Skip closing brace.
1486 skip(1);
1487
1488 // Start next string.
1489 stringStart = position;
1490
1491 // Concatenate expression.
1492 add(ADD, expressionState.position, expressionState.position + 1);
1493 add(LPAREN, expressionState.position, expressionState.position + 1);
1494
1495 // Scan expression.
1496 final Lexer lexer = new Lexer(this, expressionState);
1497 lexer.lexify();
1498
1499 // Close out expression parenthesis.
1500 add(RPAREN, position - 1, position);
1501
1502 continue;
1503 }
1504
1505 // Next character in string.
1506 skip(1);
1507 }
1508
1509 // If there is any unemitted string portion.
1510 if (stringStart != limit) {
1511 // Concatenate remaining string.
1512 if (primed) {
1513 add(ADD, stringStart, 1);
1514 }
1515
1516 add(stringType, stringStart, limit);
1517 }
1518 }
1519
1520 }
1521
1522 /**
1523 * Edit string for nested expressions.
1524 *
1525 * @param stringType Type of string literals to emit.
1526 * @param stringState State of lexer at start of string.
1527 */
1528 private void editString(final TokenType stringType, final State stringState) {
1529 // Use special lexer to scan string.
1530 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1531 lexer.lexify();
1532
1533 // Need to keep lexer informed.
1534 last = stringType;
1535 }
1536
1537 /**
1538 * Scan over a here string.
1539 *
1540 * @return TRUE if is a here string.
1541 */
1542 private boolean scanHereString(final LineInfoReceiver lir) {
1543 assert ch0 == '<' && ch1 == '<';
1544 if (scripting) {
1545 // Record beginning of here string.
1546 final State saved = saveState();
1547
1548 // << or <<<
1549 final boolean excludeLastEOL = ch2 != '<';
1550
1551 if (excludeLastEOL) {
1552 skip(2);
1553 } else {
1554 skip(3);
1555 }
1556
1557 // Scan identifier. It might be quoted, indicating that no string editing should take place.
1558 final char quoteChar = ch0;
1559 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1560 if (noStringEditing) {
1561 skip(1);
1562 }
1563 final int identStart = position;
1564 final int identLength = scanIdentifier();
1565 if (noStringEditing) {
1566 if (ch0 != quoteChar) {
1567 error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1568 restoreState(saved);
1569 return false;
1570 }
1571 skip(1);
1572 }
1573
1574 // Check for identifier.
1575 if (identLength == 0) {
1576 // Treat as shift.
1577 restoreState(saved);
1578
1579 return false;
1580 }
1581
1582 // Record rest of line.
1583 final State restState = saveState();
1584 // keep line number updated
1585 int lastLine = line;
1586
1587 skipLine(false);
1588 lastLine++;
1589 int lastLinePosition = position;
1590 restState.setLimit(position);
1591
1592 // Record beginning of string.
1593 final State stringState = saveState();
1594 int stringEnd = position;
1595
1596 // Hunt down marker.
1597 while (!atEOF()) {
1598 // Skip any whitespace.
1599 skipWhitespace(false);
1600
1601 if (hasHereMarker(identStart, identLength)) {
1602 break;
1603 }
1604
1605 skipLine(false);
1606 lastLine++;
1607 lastLinePosition = position;
1608 stringEnd = position;
1609 }
1610
1611 // notify last line information
1612 lir.lineInfo(lastLine, lastLinePosition);
1613
1614 // Record end of string.
1615 stringState.setLimit(stringEnd);
1616
1617 // If marker is missing.
1618 if (stringState.isEmpty() || atEOF()) {
1619 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1620 restoreState(saved);
1621
1622 return false;
1623 }
1624
1625 // Remove last end of line if specified.
1626 if (excludeLastEOL) {
1627 // Handles \n.
1628 if (content[stringEnd - 1] == '\n') {
1629 stringEnd--;
1630 }
1631
1632 // Handles \r and \r\n.
1633 if (content[stringEnd - 1] == '\r') {
1634 stringEnd--;
1635 }
1636
1637 // Update end of string.
1638 stringState.setLimit(stringEnd);
1639 }
1640
1641 // Edit string if appropriate.
1642 if (!noStringEditing && !stringState.isEmpty()) {
1643 editString(STRING, stringState);
1644 } else {
1645 // Add here string.
1646 add(STRING, stringState.position, stringState.limit);
1647 }
1648
1649 // Scan rest of original line.
1650 final Lexer restLexer = new Lexer(this, restState);
1651
1652 restLexer.lexify();
1653
1654 return true;
1655 }
1656
1657 return false;
1658 }
1659
1660 /**
1661 * Breaks source content down into lex units, adding tokens to the token
1662 * stream. The routine scans until the stream buffer is full. Can be called
1663 * repeatedly until EOF is detected.
1664 */
1665 public void lexify() {
1666 while (!stream.isFull() || nested) {
1667 // Skip over whitespace.
1668 skipWhitespace(true);
1669
1670 // Detect end of file.
1671 if (atEOF()) {
1672 if (!nested) {
1673 // Add an EOF token at the end.
1674 add(EOF, position);
1675 }
1676
1677 break;
1678 }
1679
1680 // Check for comments. Note that we don't scan for regexp and other literals here as
1681 // we may not have enough context to distinguish them from similar looking operators.
1682 // Instead we break on ambiguous operators below and let the parser decide.
1683 if (ch0 == '/' && skipComments()) {
1684 continue;
1685 }
1686
1687 if (scripting && ch0 == '#' && skipComments()) {
1688 continue;
1689 }
1690
1691 // TokenType for lookup of delimiter or operator.
1692 TokenType type;
1693
1694 if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1695 // '.' followed by digit.
1696 // Scan and add a number.
1697 scanNumber();
1698 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1699 if (templateExpressionOpenBraces > 0) {
1700 if (type == LBRACE) {
1701 templateExpressionOpenBraces++;
1702 } else if (type == RBRACE) {
1703 if (--templateExpressionOpenBraces == 0) {
1704 break;
1705 }
1706 }
1707 }
1708
1709 // Get the number of characters in the token.
1710 final int typeLength = type.getLength();
1711 // Skip that many characters.
1712 skip(typeLength);
1713 // Add operator token.
1714 add(type, position - typeLength);
1715 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1716 // We break to let the parser decide what it is.
1717 if (canStartLiteral(type)) {
1718 break;
1719 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1720 pauseOnNextLeftBrace = false;
1721 break;
1722 }
1723 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1724 // Scan and add identifier or keyword.
1725 scanIdentifierOrKeyword();
1726 } else if (isStringDelimiter(ch0)) {
1727 // Scan and add a string.
1728 scanString(true);
1729 } else if (Character.isDigit(ch0)) {
1730 // Scan and add a number.
1731 scanNumber();
1732 } else if (isTemplateDelimiter(ch0) && es6) {
1733 // Scan and add template in ES6 mode.
1734 scanTemplate();
1735 } else if (isTemplateDelimiter(ch0) && scripting) {
1736 // Scan and add an exec string ('`') in scripting mode.
1737 scanString(true);
1738 } else {
1739 // Don't recognize this character.
1740 skip(1);
1741 add(ERROR, position - 1);
1742 }
1743 }
1744 }
1745
1746 /**
1747 * Return value of token given its token descriptor.
1748 *
1749 * @param token Token descriptor.
1750 * @return JavaScript value.
1751 */
1752 Object getValueOf(final long token, final boolean strict) {
1753 final int start = Token.descPosition(token);
1754 final int len = Token.descLength(token);
1755
1756 switch (Token.descType(token)) {
1757 case DECIMAL:
1758 return Lexer.valueOf(source.getString(start, len), 10); // number
1759 case HEXADECIMAL:
1760 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1761 case OCTAL_LEGACY:
1762 return Lexer.valueOf(source.getString(start, len), 8); // number
1763 case OCTAL:
1764 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1765 case BINARY_NUMBER:
1766 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1767 case FLOATING:
1768 final String str = source.getString(start, len);
1769 final double value = Double.valueOf(str);
1770 if (str.indexOf('.') != -1) {
1771 return value; //number
1772 }
1773 //anything without an explicit decimal point is still subject to a
1774 //"representable as int or long" check. Then the programmer does not
1775 //explicitly code something as a double. For example new Color(int, int, int)
1776 //and new Color(float, float, float) will get ambiguous for cases like
1777 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1778 //yet we don't want e.g. 1e6 to be a double unnecessarily
1779 if (JSType.isStrictlyRepresentableAsInt(value)) {
1780 return (int)value;
1781 }
1782 return value;
1783 case STRING:
1784 return source.getString(start, len); // String
1785 case ESCSTRING:
1786 return valueOfString(start, len, strict); // String
1787 case IDENT:
1788 return valueOfIdent(start, len); // String
1789 case REGEX:
1790 return valueOfPattern(start, len); // RegexToken::LexerToken
1791 case TEMPLATE:
1792 case TEMPLATE_HEAD:
1793 case TEMPLATE_MIDDLE:
1794 case TEMPLATE_TAIL:
1795 return valueOfString(start, len, true); // String
1796 case XML:
1797 return valueOfXML(start, len); // XMLToken::LexerToken
1798 case DIRECTIVE_COMMENT:
1799 return source.getString(start, len);
1800 default:
1801 break;
1802 }
1803
1804 return null;
1805 }
1806
1807 /**
1808 * Get the raw string value of a template literal string part.
1809 *
1810 * @param token template string token
1811 * @return raw string
1812 */
1813 public String valueOfRawString(final long token) {
1814 final int start = Token.descPosition(token);
1815 final int length = Token.descLength(token);
1816
1817 // Save the current position.
1818 final int savePosition = position;
1819 // Calculate the end position.
1820 final int end = start + length;
1821 // Reset to beginning of string.
1822 reset(start);
1823
1824 // Buffer for recording characters.
1825 final StringBuilder sb = new StringBuilder(length);
1826
1827 // Scan until end of string.
1828 while (position < end) {
1829 if (ch0 == '\r') {
1830 // Convert CR-LF or CR to LF line terminator.
1831 sb.append('\n');
1832 skip(ch1 == '\n' ? 2 : 1);
1833 } else {
1834 // Add regular character.
1835 sb.append(ch0);
1836 skip(1);
1837 }
1838 }
1839
1840 // Restore position.
1841 reset(savePosition);
1842
1843 return sb.toString();
1844 }
1845
1846 /**
1847 * Get the correctly localized error message for a given message id format arguments
1848 * @param msgId message id
1849 * @param args format arguments
1850 * @return message
1851 */
1852 protected static String message(final String msgId, final String... args) {
1853 return ECMAErrors.getMessage("lexer.error." + msgId, args);
1854 }
1855
1856 /**
1857 * Generate a runtime exception
1858 *
1859 * @param message error message
1860 * @param type token type
1861 * @param start start position of lexed error
1862 * @param length length of lexed error
1863 * @throws ParserException unconditionally
1864 */
1865 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1866 final long token = Token.toDesc(type, start, length);
1867 final int pos = Token.descPosition(token);
1868 final int lineNum = source.getLine(pos);
1869 final int columnNum = source.getColumn(pos);
1870 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1871 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1872 }
1873
1874 /**
1875 * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1876 * This is the abstract superclass
1877 */
1878 public static abstract class LexerToken implements Serializable {
1879 private static final long serialVersionUID = 1L;
1880
1881 private final String expression;
1882
1883 /**
1884 * Constructor
1885 * @param expression token expression
1886 */
1887 protected LexerToken(final String expression) {
1888 this.expression = expression;
1889 }
1890
1891 /**
1892 * Get the expression
1893 * @return expression
1894 */
1895 public String getExpression() {
1896 return expression;
1897 }
1898 }
1899
1900 /**
1901 * Temporary container for regular expressions.
1902 */
1903 public static class RegexToken extends LexerToken {
1904 private static final long serialVersionUID = 1L;
1905
1906 /** Options. */
1907 private final String options;
1908
1909 /**
1910 * Constructor.
1911 *
1912 * @param expression regexp expression
1913 * @param options regexp options
1914 */
1915 public RegexToken(final String expression, final String options) {
1916 super(expression);
1917 this.options = options;
1918 }
1919
1920 /**
1921 * Get regexp options
1922 * @return options
1923 */
1924 public String getOptions() {
1925 return options;
1926 }
1927
1928 @Override
1929 public String toString() {
1930 return '/' + getExpression() + '/' + options;
1931 }
1932 }
1933
1934 /**
1935 * Temporary container for XML expression.
1936 */
1937 public static class XMLToken extends LexerToken {
1938 private static final long serialVersionUID = 1L;
1939
1940 /**
1941 * Constructor.
1942 *
1943 * @param expression XML expression
1944 */
1945 public XMLToken(final String expression) {
1946 super(expression);
1947 }
1948 }
1949 }
--- EOF ---