1 /*
2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.tools.javac.parser;
27
28 import com.sun.tools.javac.code.Lint;
29 import com.sun.tools.javac.code.Lint.LintCategory;
30 import com.sun.tools.javac.code.Preview;
31 import com.sun.tools.javac.code.Source;
32 import com.sun.tools.javac.code.Source.Feature;
33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
36 import com.sun.tools.javac.util.*;
37 import com.sun.tools.javac.util.JCDiagnostic.*;
38
39 import java.lang.reflect.InvocationTargetException;
40 import java.lang.reflect.Method;
41 import java.nio.CharBuffer;
42 import java.util.HashSet;
43 import java.util.Set;
44
45 import static com.sun.tools.javac.parser.Tokens.*;
46 import static com.sun.tools.javac.util.LayoutCharacters.*;
47
48 /** The lexical analyzer maps an input stream consisting of
49 * ASCII characters and Unicode escapes into a token sequence.
50 *
51 * <p><b>This is NOT part of any supported API.
52 * If you write code that depends on this, you do so at your own risk.
53 * This code and its internal interfaces are subject to change or
54 * deletion without notice.</b>
55 */
56 public class JavaTokenizer {
57
58 private static final boolean scannerDebug = false;
59
60 /** The source language setting.
61 */
62 private Source source;
63
64 /** The preview language setting. */
65 private Preview preview;
66
67 /** The log to be used for error reporting.
68 */
69 private final Log log;
70
71 /** The token factory. */
72 private final Tokens tokens;
73
74 /** The token kind, set by nextToken().
75 */
76 protected TokenKind tk;
77
78 /** The token's radix, set by nextToken().
79 */
80 protected int radix;
81
82 /** The token's name, set by nextToken().
83 */
84 protected Name name;
85
86 /** The position where a lexical error occurred;
87 */
88 protected int errPos = Position.NOPOS;
89
90 /** The Unicode reader (low-level stream reader).
91 */
92 protected UnicodeReader reader;
93
94 /** If is a text block
95 */
96 protected boolean isTextBlock;
97
98 /** If contains escape sequences
99 */
100 protected boolean hasEscapeSequences;
101
102 protected ScannerFactory fac;
103
104 // The set of lint options currently in effect. It is initialized
105 // from the context, and then is set/reset as needed by Attr as it
106 // visits all the various parts of the trees during attribution.
107 protected Lint lint;
108
109 private static final boolean hexFloatsWork = hexFloatsWork();
110 private static boolean hexFloatsWork() {
111 try {
112 Float.valueOf("0x1.0p1");
113 return true;
114 } catch (NumberFormatException ex) {
115 return false;
116 }
117 }
118
119 /**
120 * Create a scanner from the input array. This method might
121 * modify the array. To avoid copying the input array, ensure
122 * that {@code inputLength < input.length} or
123 * {@code input[input.length -1]} is a white space character.
124 *
125 * @param fac the factory which created this Scanner
126 * @param buf the input, might be modified
127 * Must be positive and less than or equal to input.length.
128 */
129 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
130 this(fac, new UnicodeReader(fac, buf));
131 }
132
133 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
134 this(fac, new UnicodeReader(fac, buf, inputLength));
135 }
136
137 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
138 this.fac = fac;
139 this.log = fac.log;
140 this.tokens = fac.tokens;
141 this.source = fac.source;
142 this.preview = fac.preview;
143 this.reader = reader;
144 this.lint = fac.lint;
145 }
146
147 protected void checkSourceLevel(int pos, Feature feature) {
148 if (preview.isPreview(feature) && !preview.isEnabled()) {
149 //preview feature without --preview flag, error
150 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
151 } else if (!feature.allowedInSource(source)) {
152 //incompatible source level, error
153 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
154 } else if (preview.isPreview(feature)) {
155 //use of preview feature, warn
156 preview.warnPreview(pos, feature);
157 }
158 }
159
160 /** Report an error at the given position using the provided arguments.
161 */
162 protected void lexError(int pos, JCDiagnostic.Error key) {
163 log.error(pos, key);
164 tk = TokenKind.ERROR;
165 errPos = pos;
166 }
167
168 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
169 log.error(flags, pos, key);
170 tk = TokenKind.ERROR;
171 errPos = pos;
172 }
173
174 protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
175 DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
176 log.warning(lc, dp, key);
177 }
178
179 /** Read next character in character or string literal and copy into sbuf.
180 * pos - start of literal offset
181 * translateEscapesNow - true if String::translateEscapes is not available
182 * in the java.base libs. Occurs during bootstrapping.
183 * multiline - true if scanning a text block. Allows newlines to be embedded
184 * in the result.
185 */
186 private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
187 if (reader.ch == '\\') {
188 if (reader.peekChar() == '\\' && !reader.isUnicode()) {
189 reader.skipChar();
190 if (!translateEscapesNow) {
191 reader.putChar(false);
192 }
193 reader.putChar(true);
194 } else {
195 reader.nextChar(translateEscapesNow);
196 switch (reader.ch) {
197 case '0': case '1': case '2': case '3':
198 case '4': case '5': case '6': case '7':
199 char leadch = reader.ch;
200 int oct = reader.digit(pos, 8);
201 reader.nextChar(translateEscapesNow);
202 if ('0' <= reader.ch && reader.ch <= '7') {
203 oct = oct * 8 + reader.digit(pos, 8);
204 reader.nextChar(translateEscapesNow);
205 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
206 oct = oct * 8 + reader.digit(pos, 8);
207 reader.nextChar(translateEscapesNow);
208 }
209 }
210 if (translateEscapesNow) {
211 reader.putChar((char)oct);
212 }
213 break;
214 case 'b':
215 reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
216 case 't':
217 reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
218 case 'n':
219 reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
220 case 'f':
221 reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
222 case 'r':
223 reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
224 case '\'':
225 case '\"':
226 case '\\':
227 reader.putChar(true); break;
228 case 's':
229 checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
230 reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
231 case '\n':
232 case '\r':
233 if (!multiline) {
234 lexError(reader.bp, Errors.IllegalEscChar);
235 } else {
236 checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
237 int start = reader.bp;
238 if (reader.ch == '\r' && reader.peekChar() == '\n') {
239 reader.nextChar(translateEscapesNow);
240 }
241 reader.nextChar(translateEscapesNow);
242 processLineTerminator(start, reader.bp);
243 }
244 break;
245 default:
246 lexError(reader.bp, Errors.IllegalEscChar);
247 }
248 }
249 } else if (reader.bp != reader.buflen) {
250 reader.putChar(true);
251 }
252 }
253
254 /** Interim access to String methods used to support text blocks.
255 * Required to handle bootstrapping with pre-text block jdks.
256 * Should be replaced with direct calls in the 'next' jdk.
257 */
258 static class TextBlockSupport {
259 /** Reflection method to remove incidental indentation.
260 */
261 private static final Method stripIndent;
262
263 /** Reflection method to translate escape sequences.
264 */
265 private static final Method translateEscapes;
266
267 /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
268 */
269 private static final boolean hasSupport;
270
271 /** Get a string method via refection or null if not available.
272 */
273 private static Method getStringMethodOrNull(String name) {
274 try {
275 return String.class.getMethod(name);
276 } catch (Exception ex) {
277 // Method not available, return null.
278 }
279 return null;
280 }
281
282 static {
283 // Get text block string methods.
284 stripIndent = getStringMethodOrNull("stripIndent");
285 translateEscapes = getStringMethodOrNull("translateEscapes");
286 // true if stripIndent and translateEscapes are available in the bootstrap jdk.
287 hasSupport = stripIndent != null && translateEscapes != null;
288 }
289
290 /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
291 */
292 static boolean hasSupport() {
293 return hasSupport;
294 }
295
296 /** Return the leading whitespace count (indentation) of the line.
297 */
298 private static int indent(String line) {
299 return line.length() - line.stripLeading().length();
300 }
301
302 enum WhitespaceChecks {
303 INCONSISTENT,
304 TRAILING
305 };
306
307 /** Check that the use of white space in content is not problematic.
308 */
309 static Set<WhitespaceChecks> checkWhitespace(String string) {
310 // Start with empty result set.
311 Set<WhitespaceChecks> checks = new HashSet<>();
312 // No need to check empty strings.
313 if (string.isEmpty()) {
314 return checks;
315 }
316 // Maximum common indentation.
317 int outdent = 0;
318 // No need to check indentation if opting out (last line is empty.)
319 char lastChar = string.charAt(string.length() - 1);
320 boolean optOut = lastChar == '\n' || lastChar == '\r';
321 // Split string based at line terminators.
322 String[] lines = string.split("\\R");
323 int length = lines.length;
324 // Extract last line.
325 String lastLine = length == 0 ? "" : lines[length - 1];
326 if (!optOut) {
327 // Prime with the last line indentation (may be blank.)
328 outdent = indent(lastLine);
329 for (String line : lines) {
330 // Blanks lines have no influence (last line accounted for.)
331 if (!line.isBlank()) {
332 outdent = Integer.min(outdent, indent(line));
333 if (outdent == 0) {
334 break;
335 }
336 }
337 }
338 }
339 // Last line is representative.
340 String start = lastLine.substring(0, outdent);
341 for (String line : lines) {
342 // Fail if a line does not have the same indentation.
343 if (!line.isBlank() && !line.startsWith(start)) {
344 // Mix of different white space
345 checks.add(WhitespaceChecks.INCONSISTENT);
346 }
347 // Line has content even after indent is removed.
348 if (outdent < line.length()) {
349 // Is the last character a white space.
350 lastChar = line.charAt(line.length() - 1);
351 if (Character.isWhitespace(lastChar)) {
352 // Has trailing white space.
353 checks.add(WhitespaceChecks.TRAILING);
354 }
355 }
356 }
357 return checks;
358 }
359
360 /** Invoke String::stripIndent through reflection.
361 */
362 static String stripIndent(String string) {
363 try {
364 string = (String)stripIndent.invoke(string);
365 } catch (InvocationTargetException | IllegalAccessException ex) {
366 throw new RuntimeException(ex);
367 }
368 return string;
369 }
370
371 /** Invoke String::translateEscapes through reflection.
372 */
373 static String translateEscapes(String string) {
374 try {
375 string = (String)translateEscapes.invoke(string);
376 } catch (InvocationTargetException | IllegalAccessException ex) {
377 throw new RuntimeException(ex);
378 }
379 return string;
380 }
381 }
382
383 /** Test for EOLN.
384 */
385 private boolean isEOLN() {
386 return reader.ch == LF || reader.ch == CR;
387 }
388
389 /** Test for CRLF.
390 */
391 private boolean isCRLF() {
392 return reader.ch == CR && reader.peekChar() == LF;
393 }
394
395 /** Count and skip repeated occurrences of the specified character.
396 */
397 private int countChar(char ch, int max) {
398 int count = 0;
399 for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
400 reader.scanChar();
401 }
402 return count;
403 }
404
405 /** Skip and process a line terminator.
406 */
407 private void skipLineTerminator() {
408 int start = reader.bp;
409 if (isCRLF()) {
410 reader.scanChar();
411 }
412 reader.scanChar();
413 processLineTerminator(start, reader.bp);
414 }
415
416 /** Scan a string literal or text block.
417 */
418 private void scanString(int pos) {
419 // Clear flags.
420 isTextBlock = false;
421 hasEscapeSequences = false;
422 // Track the end of first line for error recovery.
423 int firstEOLN = -1;
424 // Attempt to scan for up to 3 double quotes.
425 int openCount = countChar('\"', 3);
426 switch (openCount) {
427 case 1: // Starting a string literal.
428 break;
429 case 2: // Starting an empty string literal.
430 tk = Tokens.TokenKind.STRINGLITERAL;
431 return;
432 case 3: // Starting a text block.
433 // Check if preview feature is enabled for text blocks.
434 checkSourceLevel(pos, Feature.TEXT_BLOCKS);
435 isTextBlock = true;
436 // Verify the open delimiter sequence.
437 while (reader.bp < reader.buflen) {
438 char ch = reader.ch;
439 if (ch != ' ' && ch != '\t' && ch != FF) {
440 break;
441 }
442 reader.scanChar();
443 }
444 if (isEOLN()) {
445 skipLineTerminator();
446 } else {
447 // Error if the open delimiter sequence is not
448 // """<white space>*<LineTerminator>.
449 lexError(reader.bp, Errors.IllegalTextBlockOpen);
450 return;
451 }
452 break;
453 }
454 // While characters are available.
455 while (reader.bp < reader.buflen) {
456 // If possible close delimiter sequence.
457 if (reader.ch == '\"') {
458 // Check to see if enough double quotes are present.
459 int closeCount = countChar('\"', openCount);
460 if (openCount == closeCount) {
461 // Good result.
462 tk = Tokens.TokenKind.STRINGLITERAL;
463 return;
464 }
465 // False alarm, add double quotes to string buffer.
466 reader.repeat('\"', closeCount);
467 } else if (isEOLN()) {
468 // Line terminator in string literal is an error.
469 // Fall out to unclosed string literal error.
470 if (openCount == 1) {
471 break;
472 }
473 skipLineTerminator();
474 // Add line terminator to string buffer.
475 reader.putChar('\n', false);
476 // Record first line terminator for error recovery.
477 if (firstEOLN == -1) {
478 firstEOLN = reader.bp;
479 }
480 } else if (reader.ch == '\\') {
481 // Handle escape sequences.
482 hasEscapeSequences = true;
483 // Translate escapes immediately if TextBlockSupport is not available
484 // during bootstrapping.
485 boolean translateEscapesNow = !TextBlockSupport.hasSupport();
486 scanLitChar(pos, translateEscapesNow, openCount != 1);
487 } else {
488 // Add character to string buffer.
489 reader.putChar(true);
490 }
491 }
492 // String ended without close delimiter sequence.
493 lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
494 if (firstEOLN != -1) {
495 // Reset recovery position to point after open delimiter sequence.
496 reader.reset(firstEOLN);
497 }
498 }
499
500 private void scanDigits(int pos, int digitRadix) {
501 char saveCh;
502 int savePos;
503 do {
504 if (reader.ch != '_') {
505 reader.putChar(false);
506 }
507 saveCh = reader.ch;
508 savePos = reader.bp;
509 reader.scanChar();
510 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
511 if (saveCh == '_')
512 lexError(savePos, Errors.IllegalUnderscore);
513 }
514
515 /** Read fractional part of hexadecimal floating point number.
516 */
517 private void scanHexExponentAndSuffix(int pos) {
518 if (reader.ch == 'p' || reader.ch == 'P') {
519 reader.putChar(true);
520 skipIllegalUnderscores();
521 if (reader.ch == '+' || reader.ch == '-') {
522 reader.putChar(true);
523 }
524 skipIllegalUnderscores();
525 if (reader.digit(pos, 10) >= 0) {
526 scanDigits(pos, 10);
527 if (!hexFloatsWork)
528 lexError(pos, Errors.UnsupportedCrossFpLit);
529 } else
530 lexError(pos, Errors.MalformedFpLit);
531 } else {
532 lexError(pos, Errors.MalformedFpLit);
533 }
534 if (reader.ch == 'f' || reader.ch == 'F') {
535 reader.putChar(true);
536 tk = TokenKind.FLOATLITERAL;
537 radix = 16;
538 } else {
539 if (reader.ch == 'd' || reader.ch == 'D') {
540 reader.putChar(true);
541 }
542 tk = TokenKind.DOUBLELITERAL;
543 radix = 16;
544 }
545 }
546
547 /** Read fractional part of floating point number.
548 */
549 private void scanFraction(int pos) {
550 skipIllegalUnderscores();
551 if (reader.digit(pos, 10) >= 0) {
552 scanDigits(pos, 10);
553 }
554 int sp1 = reader.sp;
555 if (reader.ch == 'e' || reader.ch == 'E') {
556 reader.putChar(true);
557 skipIllegalUnderscores();
558 if (reader.ch == '+' || reader.ch == '-') {
559 reader.putChar(true);
560 }
561 skipIllegalUnderscores();
562 if (reader.digit(pos, 10) >= 0) {
563 scanDigits(pos, 10);
564 return;
565 }
566 lexError(pos, Errors.MalformedFpLit);
567 reader.sp = sp1;
568 }
569 }
570
571 /** Read fractional part and 'd' or 'f' suffix of floating point number.
572 */
573 private void scanFractionAndSuffix(int pos) {
574 radix = 10;
575 scanFraction(pos);
576 if (reader.ch == 'f' || reader.ch == 'F') {
577 reader.putChar(true);
578 tk = TokenKind.FLOATLITERAL;
579 } else {
580 if (reader.ch == 'd' || reader.ch == 'D') {
581 reader.putChar(true);
582 }
583 tk = TokenKind.DOUBLELITERAL;
584 }
585 }
586
587 /** Read fractional part and 'd' or 'f' suffix of floating point number.
588 */
589 private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
590 radix = 16;
591 Assert.check(reader.ch == '.');
592 reader.putChar(true);
593 skipIllegalUnderscores();
594 if (reader.digit(pos, 16) >= 0) {
595 seendigit = true;
596 scanDigits(pos, 16);
597 }
598 if (!seendigit)
599 lexError(pos, Errors.InvalidHexNumber);
600 else
601 scanHexExponentAndSuffix(pos);
602 }
603
604 private void skipIllegalUnderscores() {
605 if (reader.ch == '_') {
606 lexError(reader.bp, Errors.IllegalUnderscore);
607 while (reader.ch == '_')
608 reader.scanChar();
609 }
610 }
611
612 /** Read a number.
613 * @param radix The radix of the number; one of 2, 8, 10, 16.
614 */
615 private void scanNumber(int pos, int radix) {
616 // for octal, allow base-10 digit in case it's a float literal
617 this.radix = radix;
618 int digitRadix = (radix == 8 ? 10 : radix);
619 int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
620 boolean seendigit = firstDigit >= 0;
621 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
622 if (seendigit) {
623 scanDigits(pos, digitRadix);
624 }
625 if (radix == 16 && reader.ch == '.') {
626 scanHexFractionAndSuffix(pos, seendigit);
627 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
628 scanHexExponentAndSuffix(pos);
629 } else if (digitRadix == 10 && reader.ch == '.') {
630 reader.putChar(true);
631 scanFractionAndSuffix(pos);
632 } else if (digitRadix == 10 &&
633 (reader.ch == 'e' || reader.ch == 'E' ||
634 reader.ch == 'f' || reader.ch == 'F' ||
635 reader.ch == 'd' || reader.ch == 'D')) {
636 scanFractionAndSuffix(pos);
637 } else {
638 if (!seenValidDigit) {
639 switch (radix) {
640 case 2:
641 lexError(pos, Errors.InvalidBinaryNumber);
642 break;
643 case 16:
644 lexError(pos, Errors.InvalidHexNumber);
645 break;
646 }
647 }
648 if (reader.ch == 'l' || reader.ch == 'L') {
649 reader.scanChar();
650 tk = TokenKind.LONGLITERAL;
651 } else {
652 tk = TokenKind.INTLITERAL;
653 }
654 }
655 }
656
657 /** Read an identifier.
658 */
659 private void scanIdent() {
660 boolean isJavaIdentifierPart;
661 char high;
662 reader.putChar(true);
663 do {
664 switch (reader.ch) {
665 case 'A': case 'B': case 'C': case 'D': case 'E':
666 case 'F': case 'G': case 'H': case 'I': case 'J':
667 case 'K': case 'L': case 'M': case 'N': case 'O':
668 case 'P': case 'Q': case 'R': case 'S': case 'T':
669 case 'U': case 'V': case 'W': case 'X': case 'Y':
670 case 'Z':
671 case 'a': case 'b': case 'c': case 'd': case 'e':
672 case 'f': case 'g': case 'h': case 'i': case 'j':
673 case 'k': case 'l': case 'm': case 'n': case 'o':
674 case 'p': case 'q': case 'r': case 's': case 't':
675 case 'u': case 'v': case 'w': case 'x': case 'y':
676 case 'z':
677 case '$': case '_':
678 case '0': case '1': case '2': case '3': case '4':
679 case '5': case '6': case '7': case '8': case '9':
680 break;
681 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
682 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
683 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
684 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
685 case '\u0015': case '\u0016': case '\u0017':
686 case '\u0018': case '\u0019': case '\u001B':
687 case '\u007F':
688 reader.scanChar();
689 continue;
690 case '\u001A': // EOI is also a legal identifier part
691 if (reader.bp >= reader.buflen) {
692 name = reader.name();
693 tk = tokens.lookupKind(name);
694 return;
695 }
696 reader.scanChar();
697 continue;
698 default:
699 if (reader.ch < '\u0080') {
700 // all ASCII range chars already handled, above
701 isJavaIdentifierPart = false;
702 } else {
703 if (Character.isIdentifierIgnorable(reader.ch)) {
704 reader.scanChar();
705 continue;
706 } else {
707 int codePoint = reader.peekSurrogates();
708 if (codePoint >= 0) {
709 if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
710 reader.putChar(true);
711 }
712 } else {
713 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
714 }
715 }
716 }
717 if (!isJavaIdentifierPart) {
718 name = reader.name();
719 tk = tokens.lookupKind(name);
720 return;
721 }
722 }
723 reader.putChar(true);
724 } while (true);
725 }
726
727 /** Return true if reader.ch can be part of an operator.
728 */
729 private boolean isSpecial(char ch) {
730 switch (ch) {
731 case '!': case '%': case '&': case '*': case '?':
732 case '+': case '-': case ':': case '<': case '=':
733 case '>': case '^': case '|': case '~':
734 case '@':
735 return true;
736 default:
737 return false;
738 }
739 }
740
741 /** Read longest possible sequence of special characters and convert
742 * to token.
743 */
744 private void scanOperator() {
745 while (true) {
746 reader.putChar(false);
747 Name newname = reader.name();
748 TokenKind tk1 = tokens.lookupKind(newname);
749 if (tk1 == TokenKind.IDENTIFIER) {
750 reader.sp--;
751 break;
752 }
753 tk = tk1;
754 reader.scanChar();
755 if (!isSpecial(reader.ch)) break;
756 }
757 }
758
759 /** Read token.
760 */
761 public Token readToken() {
762
763 reader.sp = 0;
764 name = null;
765 radix = 0;
766
767 int pos = 0;
768 int endPos = 0;
769 List<Comment> comments = null;
770
771 try {
772 loop: while (true) {
773 pos = reader.bp;
774 switch (reader.ch) {
775 case ' ': // (Spec 3.6)
776 case '\t': // (Spec 3.6)
777 case FF: // (Spec 3.6)
778 do {
779 reader.scanChar();
780 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
781 processWhiteSpace(pos, reader.bp);
782 break;
783 case LF: // (Spec 3.4)
784 reader.scanChar();
785 processLineTerminator(pos, reader.bp);
786 break;
787 case CR: // (Spec 3.4)
788 reader.scanChar();
789 if (reader.ch == LF) {
790 reader.scanChar();
791 }
792 processLineTerminator(pos, reader.bp);
793 break;
794 case 'A': case 'B': case 'C': case 'D': case 'E':
795 case 'F': case 'G': case 'H': case 'I': case 'J':
796 case 'K': case 'L': case 'M': case 'N': case 'O':
797 case 'P': case 'Q': case 'R': case 'S': case 'T':
798 case 'U': case 'V': case 'W': case 'X': case 'Y':
799 case 'Z':
800 case 'a': case 'b': case 'c': case 'd': case 'e':
801 case 'f': case 'g': case 'h': case 'i': case 'j':
802 case 'k': case 'l': case 'm': case 'n': case 'o':
803 case 'p': case 'q': case 'r': case 's': case 't':
804 case 'u': case 'v': case 'w': case 'x': case 'y':
805 case 'z':
806 case '$': case '_':
807 scanIdent();
808 break loop;
809 case '0':
810 reader.scanChar();
811 if (reader.ch == 'x' || reader.ch == 'X') {
812 reader.scanChar();
813 skipIllegalUnderscores();
814 scanNumber(pos, 16);
815 } else if (reader.ch == 'b' || reader.ch == 'B') {
816 reader.scanChar();
817 skipIllegalUnderscores();
818 scanNumber(pos, 2);
819 } else {
820 reader.putChar('0');
821 if (reader.ch == '_') {
822 int savePos = reader.bp;
823 do {
824 reader.scanChar();
825 } while (reader.ch == '_');
826 if (reader.digit(pos, 10) < 0) {
827 lexError(savePos, Errors.IllegalUnderscore);
828 }
829 }
830 scanNumber(pos, 8);
831 }
832 break loop;
833 case '1': case '2': case '3': case '4':
834 case '5': case '6': case '7': case '8': case '9':
835 scanNumber(pos, 10);
836 break loop;
837 case '.':
838 reader.scanChar();
839 if (reader.digit(pos, 10) >= 0) {
840 reader.putChar('.');
841 scanFractionAndSuffix(pos);
842 } else if (reader.ch == '.') {
843 int savePos = reader.bp;
844 reader.putChar('.'); reader.putChar('.', true);
845 if (reader.ch == '.') {
846 reader.scanChar();
847 reader.putChar('.');
848 tk = TokenKind.ELLIPSIS;
849 } else {
850 lexError(savePos, Errors.IllegalDot);
851 }
852 } else {
853 tk = TokenKind.DOT;
854 }
855 break loop;
856 case ',':
857 reader.scanChar(); tk = TokenKind.COMMA; break loop;
858 case ';':
859 reader.scanChar(); tk = TokenKind.SEMI; break loop;
860 case '(':
861 reader.scanChar(); tk = TokenKind.LPAREN; break loop;
862 case ')':
863 reader.scanChar(); tk = TokenKind.RPAREN; break loop;
864 case '[':
865 reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
866 case ']':
867 reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
868 case '{':
869 reader.scanChar(); tk = TokenKind.LBRACE; break loop;
870 case '}':
871 reader.scanChar(); tk = TokenKind.RBRACE; break loop;
872 case '/':
873 reader.scanChar();
874 if (reader.ch == '/') {
875 do {
876 reader.scanCommentChar();
877 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
878 if (reader.bp < reader.buflen) {
879 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
880 }
881 break;
882 } else if (reader.ch == '*') {
883 boolean isEmpty = false;
884 reader.scanChar();
885 CommentStyle style;
886 if (reader.ch == '*') {
887 style = CommentStyle.JAVADOC;
888 reader.scanCommentChar();
889 if (reader.ch == '/') {
890 isEmpty = true;
891 }
892 } else {
893 style = CommentStyle.BLOCK;
894 }
895 while (!isEmpty && reader.bp < reader.buflen) {
896 if (reader.ch == '*') {
897 reader.scanChar();
898 if (reader.ch == '/') break;
899 } else {
900 reader.scanCommentChar();
901 }
902 }
903 if (reader.ch == '/') {
904 reader.scanChar();
905 comments = addComment(comments, processComment(pos, reader.bp, style));
906 break;
907 } else {
908 lexError(pos, Errors.UnclosedComment);
909 break loop;
910 }
911 } else if (reader.ch == '=') {
912 tk = TokenKind.SLASHEQ;
913 reader.scanChar();
914 } else {
915 tk = TokenKind.SLASH;
916 }
917 break loop;
918 case '\'':
919 reader.scanChar();
920 if (reader.ch == '\'') {
921 lexError(pos, Errors.EmptyCharLit);
922 reader.scanChar();
923 } else {
924 if (isEOLN())
925 lexError(pos, Errors.IllegalLineEndInCharLit);
926 scanLitChar(pos, true, false);
927 if (reader.ch == '\'') {
928 reader.scanChar();
929 tk = TokenKind.CHARLITERAL;
930 } else {
931 lexError(pos, Errors.UnclosedCharLit);
932 }
933 }
934 break loop;
935 case '\"':
936 scanString(pos);
937 break loop;
938 default:
939 if (isSpecial(reader.ch)) {
940 scanOperator();
941 } else {
942 boolean isJavaIdentifierStart;
943 int codePoint = -1;
944 if (reader.ch < '\u0080') {
945 // all ASCII range chars already handled, above
946 isJavaIdentifierStart = false;
947 } else {
948 codePoint = reader.peekSurrogates();
949 if (codePoint >= 0) {
950 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
951 reader.putChar(true);
952 }
953 } else {
954 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
955 }
956 }
957 if (isJavaIdentifierStart) {
958 scanIdent();
959 } else if (reader.digit(pos, 10) >= 0) {
960 scanNumber(pos, 10);
961 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
962 tk = TokenKind.EOF;
963 pos = reader.realLength;
964 } else {
965 String arg;
966
967 if (codePoint >= 0) {
968 char high = reader.ch;
969 reader.scanChar();
970 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
971 } else {
972 arg = (32 < reader.ch && reader.ch < 127) ?
973 String.format("%s", reader.ch) :
974 String.format("\\u%04x", (int)reader.ch);
975 }
976 lexError(pos, Errors.IllegalChar(arg));
977 reader.scanChar();
978 }
979 }
980 break loop;
981 }
982 }
983 endPos = reader.bp;
984 switch (tk.tag) {
985 case DEFAULT: return new Token(tk, pos, endPos, comments);
986 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
987 case STRING: {
988 // Get characters from string buffer.
989 String string = reader.chars();
990 // If a text block.
991 if (isTextBlock && TextBlockSupport.hasSupport()) {
992 // Verify that the incidental indentation is consistent.
993 if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
994 Set<TextBlockSupport.WhitespaceChecks> checks =
995 TextBlockSupport.checkWhitespace(string);
996 if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
997 lexWarning(LintCategory.TEXT_BLOCKS, pos,
998 Warnings.InconsistentWhiteSpaceIndentation);
999 }
1000 if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1001 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1002 Warnings.TrailingWhiteSpaceWillBeRemoved);
1003 }
1004 }
1005 // Remove incidental indentation.
1006 try {
1007 string = TextBlockSupport.stripIndent(string);
1008 } catch (Exception ex) {
1009 // Error already reported, just use unstripped string.
1010 }
1011 }
1012 // Translate escape sequences if present.
1013 if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
1014 try {
1015 string = TextBlockSupport.translateEscapes(string);
1016 } catch (Exception ex) {
1017 // Error already reported, just use untranslated string.
1018 }
1019 }
1020 // Build string token.
1021 return new StringToken(tk, pos, endPos, string, comments);
1022 }
1023 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1024 default: throw new AssertionError();
1025 }
1026 }
1027 finally {
1028 if (scannerDebug) {
1029 System.out.println("nextToken(" + pos
1030 + "," + endPos + ")=|" +
1031 new String(reader.getRawCharacters(pos, endPos))
1032 + "|");
1033 }
1034 }
1035 }
1036 //where
1037 List<Comment> addComment(List<Comment> comments, Comment comment) {
1038 return comments == null ?
1039 List.of(comment) :
1040 comments.prepend(comment);
1041 }
1042
1043 /** Return the position where a lexical error occurred;
1044 */
1045 public int errPos() {
1046 return errPos;
1047 }
1048
1049 /** Set the position where a lexical error occurred;
1050 */
1051 public void errPos(int pos) {
1052 errPos = pos;
1053 }
1054
1055 /**
1056 * Called when a complete comment has been scanned. pos and endPos
1057 * will mark the comment boundary.
1058 */
1059 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1060 if (scannerDebug)
1061 System.out.println("processComment(" + pos
1062 + "," + endPos + "," + style + ")=|"
1063 + new String(reader.getRawCharacters(pos, endPos))
1064 + "|");
1065 char[] buf = reader.getRawCharacters(pos, endPos);
1066 return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
1067 }
1068
1069 /**
1070 * Called when a complete whitespace run has been scanned. pos and endPos
1071 * will mark the whitespace boundary.
1072 */
1073 protected void processWhiteSpace(int pos, int endPos) {
1074 if (scannerDebug)
1075 System.out.println("processWhitespace(" + pos
1076 + "," + endPos + ")=|" +
1077 new String(reader.getRawCharacters(pos, endPos))
1078 + "|");
1079 }
1080
1081 /**
1082 * Called when a line terminator has been processed.
1083 */
1084 protected void processLineTerminator(int pos, int endPos) {
1085 if (scannerDebug)
1086 System.out.println("processTerminator(" + pos
1087 + "," + endPos + ")=|" +
1088 new String(reader.getRawCharacters(pos, endPos))
1089 + "|");
1090 }
1091
1092 /** Build a map for translating between line numbers and
1093 * positions in the input.
1094 *
1095 * @return a LineMap */
1096 public Position.LineMap getLineMap() {
1097 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
1098 }
1099
1100
1101 /**
1102 * Scan a documentation comment; determine if a deprecated tag is present.
1103 * Called once the initial /, * have been skipped, positioned at the second *
1104 * (which is treated as the beginning of the first line).
1105 * Stops positioned at the closing '/'.
1106 */
1107 protected static class BasicComment<U extends UnicodeReader> implements Comment {
1108
1109 CommentStyle cs;
1110 U comment_reader;
1111
1112 protected boolean deprecatedFlag = false;
1113 protected boolean scanned = false;
1114
1115 protected BasicComment(U comment_reader, CommentStyle cs) {
1116 this.comment_reader = comment_reader;
1117 this.cs = cs;
1118 }
1119
1120 public String getText() {
1121 return null;
1122 }
1123
1124 public int getSourcePos(int pos) {
1125 return -1;
1126 }
1127
1128 public CommentStyle getStyle() {
1129 return cs;
1130 }
1131
1132 public boolean isDeprecated() {
1133 if (!scanned && cs == CommentStyle.JAVADOC) {
1134 scanDocComment();
1135 }
1136 return deprecatedFlag;
1137 }
1138
1139 @SuppressWarnings("fallthrough")
1140 protected void scanDocComment() {
1141 try {
1142 boolean deprecatedPrefix = false;
1143
1144 comment_reader.bp += 3; // '/**'
1145 comment_reader.ch = comment_reader.buf[comment_reader.bp];
1146
1147 forEachLine:
1148 while (comment_reader.bp < comment_reader.buflen) {
1149
1150 // Skip optional WhiteSpace at beginning of line
1151 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1152 comment_reader.scanCommentChar();
1153 }
1154
1155 // Skip optional consecutive Stars
1156 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
1157 comment_reader.scanCommentChar();
1158 if (comment_reader.ch == '/') {
1159 return;
1160 }
1161 }
1162
1163 // Skip optional WhiteSpace after Stars
1164 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1165 comment_reader.scanCommentChar();
1166 }
1167
1168 deprecatedPrefix = false;
1169 // At beginning of line in the JavaDoc sense.
1170 if (!deprecatedFlag) {
1171 String deprecated = "@deprecated";
1172 int i = 0;
1173 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
1174 comment_reader.scanCommentChar();
1175 i++;
1176 if (i == deprecated.length()) {
1177 deprecatedPrefix = true;
1178 break;
1179 }
1180 }
1181 }
1182
1183 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
1184 if (Character.isWhitespace(comment_reader.ch)) {
1185 deprecatedFlag = true;
1186 } else if (comment_reader.ch == '*') {
1187 comment_reader.scanCommentChar();
1188 if (comment_reader.ch == '/') {
1189 deprecatedFlag = true;
1190 return;
1191 }
1192 }
1193 }
1194
1195 // Skip rest of line
1196 while (comment_reader.bp < comment_reader.buflen) {
1197 switch (comment_reader.ch) {
1198 case '*':
1199 comment_reader.scanCommentChar();
1200 if (comment_reader.ch == '/') {
1201 return;
1202 }
1203 break;
1204 case CR: // (Spec 3.4)
1205 comment_reader.scanCommentChar();
1206 if (comment_reader.ch != LF) {
1207 continue forEachLine;
1208 }
1209 /* fall through to LF case */
1210 case LF: // (Spec 3.4)
1211 comment_reader.scanCommentChar();
1212 continue forEachLine;
1213 default:
1214 comment_reader.scanCommentChar();
1215 }
1216 } // rest of line
1217 } // forEachLine
1218 return;
1219 } finally {
1220 scanned = true;
1221 }
1222 }
1223 }
1224 }
|
1 /*
2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.tools.javac.parser;
27
28 import com.sun.tools.javac.code.Lint;
29 import com.sun.tools.javac.code.Lint.LintCategory;
30 import com.sun.tools.javac.code.Preview;
31 import com.sun.tools.javac.code.Source;
32 import com.sun.tools.javac.code.Source.Feature;
33 import com.sun.tools.javac.file.JavacFileManager;
34 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
35 import com.sun.tools.javac.resources.CompilerProperties.Errors;
36 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
37 import com.sun.tools.javac.util.*;
38 import com.sun.tools.javac.util.JCDiagnostic.*;
39
40 import java.nio.CharBuffer;
41 import java.util.Set;
42 import java.util.regex.Pattern;
43
44 import static com.sun.tools.javac.parser.Tokens.*;
45 import static com.sun.tools.javac.util.LayoutCharacters.EOI;
46
47 /**
48 * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
49 * escape sequences into a token sequence.
50 *
51 * <p><b>This is NOT part of any supported API.
52 * If you write code that depends on this, you do so at your own risk.
53 * This code and its internal interfaces are subject to change or
54 * deletion without notice.</b>
55 */
56 public class JavaTokenizer extends UnicodeReader {
57 /**
58 * If true then prints token information after each nextToken().
59 */
60 private static final boolean scannerDebug = false;
61
62 /**
63 * Sentinal for non-value.
64 */
65 private int NOT_FOUND = -1;
66
67 /**
68 * The source language setting. Copied from scanner factory.
69 */
70 private Source source;
71
72 /**
73 * The preview language setting. Copied from scanner factory.
74 */
75 private Preview preview;
76
77 /**
78 * The log to be used for error reporting. Copied from scanner factory.
79 */
80 private final Log log;
81
82 /**
83 * The token factory. Copied from scanner factory.
84 */
85 private final Tokens tokens;
86
87 /**
88 * The names factory. Copied from scanner factory.
89 */
90 private final Names names;
91
92 /**
93 * The token kind, set by nextToken().
94 */
95 protected TokenKind tk;
96
97 /**
98 * The token's radix, set by nextToken().
99 */
100 protected int radix;
101
102 /**
103 * The token's name, set by nextToken().
104 */
105 protected Name name;
106
107 /**
108 * The position where a lexical error occurred;
109 */
110 protected int errPos = Position.NOPOS;
111
112 /**
113 * true if is a text block, set by nextToken().
114 */
115 protected boolean isTextBlock;
116
117 /**
118 * true if contains escape sequences, set by nextToken().
119 */
120 protected boolean hasEscapeSequences;
121
122 /**
123 * Buffer for building literals, used by nextToken().
124 */
125 protected StringBuilder sb;
126
127 /**
128 * Origin scanner factory.
129 */
130 protected ScannerFactory fac;
131
132 /**
133 * The set of lint options currently in effect. It is initialized
134 * from the context, and then is set/reset as needed by Attr as it
135 * visits all the various parts of the trees during attribution.
136 */
137 protected Lint lint;
138
139 /**
140 * Construct a Java token scanner from the input character buffer.
141 *
142 * @param fac the factory which created this Scanner.
143 * @param cb the input character buffer.
144 */
145 protected JavaTokenizer(ScannerFactory fac, CharBuffer cb) {
146 this(fac, JavacFileManager.toArray(cb), cb.limit());
147 }
148
149 /**
150 * Construct a Java token scanner from the input character array.
151 *
152 * @param fac the factory which created this Scanner
153 * @param array the input character array.
154 * @param length The length of the meaningful content in the array.
155 */
156 protected JavaTokenizer(ScannerFactory fac, char[] array, int length) {
157 super(fac, array, length);
158 this.fac = fac;
159 this.log = fac.log;
160 this.names = fac.names;
161 this.tokens = fac.tokens;
162 this.source = fac.source;
163 this.preview = fac.preview;
164 this.lint = fac.lint;
165 this.sb = new StringBuilder(256);
166 }
167
168 /**
169 * Check the source level for a lexical feature.
170 *
171 * @param pos position in input buffer.
172 * @param feature feature to verify.
173 */
174 protected void checkSourceLevel(int pos, Feature feature) {
175 if (preview.isPreview(feature) && !preview.isEnabled()) {
176 //preview feature without --preview flag, error
177 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
178 } else if (!feature.allowedInSource(source)) {
179 //incompatible source level, error
180 lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
181 } else if (preview.isPreview(feature)) {
182 //use of preview feature, warn
183 preview.warnPreview(pos, feature);
184 }
185 }
186
187 /**
188 * Report an error at the given position using the provided arguments.
189 *
190 * @param pos position in input buffer.
191 * @param key error key to report.
192 */
193 protected void lexError(int pos, JCDiagnostic.Error key) {
194 log.error(pos, key);
195 tk = TokenKind.ERROR;
196 errPos = pos;
197 }
198
199 /**
200 * Report an error at the given position using the provided arguments.
201 *
202 * @param flags diagnostic flags.
203 * @param pos position in input buffer.
204 * @param key error key to report.
205 */
206 protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
207 log.error(flags, pos, key);
208 tk = TokenKind.ERROR;
209 errPos = pos;
210 }
211
212 /**
213 * Report an error at the given position using the provided arguments.
214 *
215 * @param lc lint category.
216 * @param pos position in input buffer.
217 * @param key error key to report.
218 */
219 protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
220 DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
221 log.warning(lc, dp, key);
222 }
223
224 /**
225 * Add a character to the literal buffer.
226 *
227 * @param ch character to add.
228 */
229 protected void put(char ch) {
230 sb.append(ch);
231 }
232
233 /**
234 * Add a codepoint to the literal buffer.
235 *
236 * @param codePoint codepoint to add.
237 */
238 protected void putCodePoint(int codePoint) {
239 sb.appendCodePoint(codePoint);
240 }
241
242 /**
243 * Add current character or codepoint to the literal buffer.
244 */
245 protected void put() {
246 if (isSurrogate()) {
247 putCodePoint(getCodepoint());
248 } else {
249 put(get());
250 }
251 }
252
253 /**
254 * Add a string to the literal buffer.
255 */
256 protected void put(String string) {
257 sb.append(string);
258 }
259
260 /**
261 * Add current character or codepoint to the literal buffer then return next character.
262 */
263 protected char putThenNext() {
264 put();
265
266 return next();
267 }
268
269 /**
270 * If the specified character ch matches the current character then add current character
271 * to the literal buffer and then advance.
272 *
273 * @param ch character to match.
274 *
275 * @return true if ch matches current character.
276 */
277 protected boolean acceptThenPut(char ch) {
278 if (is(ch)) {
279 put(get());
280 next();
281
282 return true;
283 }
284
285 return false;
286 }
287
288 /**
289 * If either ch1 or ch2 matches the current character then add current character
290 * to the literal buffer and then advance.
291 *
292 * @param ch1 first character to match.
293 * @param ch2 second character to match.
294 *
295 * @return true if either ch1 or ch2 matches current character.
296 */
297 protected boolean acceptOneOfThenPut(char ch1, char ch2) {
298 if (isOneOf(ch1, ch2)) {
299 put(get());
300 next();
301
302 return true;
303 }
304
305 return false;
306 }
307
308 /**
309 * Test if the current character is a line terminator.
310 *
311 * @return true if current character is a line terminator.
312 */
313 private boolean isEOLN() {
314 return isOneOf('\n', '\r');
315 }
316
317 /**
318 * Skip and process a line terminator sequence.
319 */
320 private void skipLineTerminator() {
321 int start = position();
322 accept('\r');
323 accept('\n');
324 processLineTerminator(start, position());
325 }
326
327 /**
328 * Processes the current character and places in the literal buffer. If the current
329 * character is a backslash then the next character is validated as a proper
330 * escape character. Conversion of escape sequences takes place at end of nextToken().
331 *
332 * @param pos position of the first character in literal.
333 */
334 private void scanLitChar(int pos) {
335 if (acceptThenPut('\\')) {
336 hasEscapeSequences = true;
337
338 switch (get()) {
339 case '0': case '1': case '2': case '3':
340 case '4': case '5': case '6': case '7':
341 char leadch = get();
342 putThenNext();
343
344 if (inRange('0', '7')) {
345 putThenNext();
346
347 if (leadch <= '3' && inRange('0', '7')) {
348 putThenNext();
349 }
350 }
351 break;
352
353 case 'b':
354 case 't':
355 case 'n':
356 case 'f':
357 case 'r':
358 case '\'':
359 case '\"':
360 case '\\':
361 putThenNext();
362 break;
363
364 case 's':
365 checkSourceLevel(position(), Feature.TEXT_BLOCKS);
366 putThenNext();
367 break;
368
369 case '\n':
370 case '\r':
371 if (isTextBlock) {
372 skipLineTerminator();
373 // Normalize line terminator.
374 put('\n');
375 } else {
376 lexError(position(), Errors.IllegalEscChar);
377 }
378 break;
379
380 default:
381 lexError(position(), Errors.IllegalEscChar);
382 break;
383 }
384 } else {
385 putThenNext();
386 }
387 }
388
389 /**
390 * Scan a string literal or text block.
391 *
392 * @param pos position of the first character in literal.
393 */
394 private void scanString(int pos) {
395 // Assume the best.
396 tk = Tokens.TokenKind.STRINGLITERAL;
397 // Track the end of first line for error recovery.
398 int firstEOLN = NOT_FOUND;
399 // Check for text block delimiter.
400 isTextBlock = accept("\"\"\"");
401
402 if (isTextBlock) {
403 // Check if preview feature is enabled for text blocks.
404 checkSourceLevel(pos, Feature.TEXT_BLOCKS);
405
406 // Verify the open delimiter sequence.
407 // Error if the open delimiter sequence is not """<white space>*<LineTerminator>.
408 skipWhitespace();
409
410 if (isEOLN()) {
411 skipLineTerminator();
412 } else {
413 lexError(position(), Errors.IllegalTextBlockOpen);
414 return;
415 }
416
417 // While characters are available.
418 while (!isEOF()) {
419 if (accept("\"\"\"")) {
420 return;
421 }
422
423 if (isEOLN()) {
424 skipLineTerminator();
425 // Add normalized line terminator to literal buffer.
426 put('\n');
427
428 // Record first line terminator for error recovery.
429 if (firstEOLN == NOT_FOUND) {
430 firstEOLN = position();
431 }
432 } else {
433 // Add character to string buffer.
434 scanLitChar(pos);
435 }
436 }
437 } else {
438 // Skip first quote.
439 next();
440
441 // While characters are available.
442 while (!isEOF()) {
443 if (accept('\"')) {
444 return;
445 }
446
447 if (isEOLN()) {
448 // Line terminator in string literal is an error.
449 // Fall out to unclosed string literal error.
450 break;
451 } else {
452 // Add character to string buffer.
453 scanLitChar(pos);
454 }
455 }
456 }
457
458 // String ended without close delimiter sequence.
459 lexError(pos, isTextBlock ? Errors.UnclosedTextBlock : Errors.UnclosedStrLit);
460
461 if (firstEOLN != NOT_FOUND) {
462 // Reset recovery position to point after text block open delimiter sequence.
463 reset(firstEOLN);
464 }
465 }
466
467 /**
468 * Scan sequence of digits.
469 *
470 * @param pos position of the first character in literal.
471 * @param digitRadix radix of numeric literal.
472 */
473 private void scanDigits(int pos, int digitRadix) {
474 int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
475 int trailingUnderscorePos;
476
477 do {
478 if (!is('_')) {
479 put();
480 trailingUnderscorePos = NOT_FOUND;
481 } else {
482 trailingUnderscorePos = position();
483 }
484
485 next();
486 } while (digit(pos, digitRadix) >= 0 || is('_'));
487
488 if (leadingUnderscorePos != NOT_FOUND) {
489 lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
490 } else if (trailingUnderscorePos != NOT_FOUND) {
491 lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
492 }
493 }
494
495 /**
496 * Read fractional part of hexadecimal floating point number.
497 *
498 * @param pos position of the first character in literal.
499 */
500 private void scanHexExponentAndSuffix(int pos) {
501 if (acceptOneOfThenPut('p', 'P')) {
502 skipIllegalUnderscores();
503 acceptOneOfThenPut('+', '-');
504 skipIllegalUnderscores();
505
506 if (digit(pos, 10) >= 0) {
507 scanDigits(pos, 10);
508 } else {
509 lexError(pos, Errors.MalformedFpLit);
510 }
511 } else {
512 lexError(pos, Errors.MalformedFpLit);
513 }
514
515 if (acceptOneOfThenPut('f', 'F')) {
516 tk = TokenKind.FLOATLITERAL;
517 radix = 16;
518 } else {
519 acceptOneOfThenPut('d', 'D');
520 tk = TokenKind.DOUBLELITERAL;
521 radix = 16;
522 }
523 }
524
525 /**
526 * Read fractional part of floating point number.
527 *
528 * @param pos position of the first character in literal.
529 */
530 private void scanFraction(int pos) {
531 skipIllegalUnderscores();
532
533 if (digit(pos, 10) >= 0) {
534 scanDigits(pos, 10);
535 }
536
537 int index = sb.length();
538
539 if (acceptOneOfThenPut('e', 'E')) {
540 skipIllegalUnderscores();
541 acceptOneOfThenPut('+', '-');
542 skipIllegalUnderscores();
543
544 if (digit(pos, 10) >= 0) {
545 scanDigits(pos, 10);
546 return;
547 }
548
549 lexError(pos, Errors.MalformedFpLit);
550 sb.setLength(index);
551 }
552 }
553
554 /**
555 * Read fractional part and 'd' or 'f' suffix of floating point number.
556 *
557 * @param pos position of the first character in literal.
558 */
559 private void scanFractionAndSuffix(int pos) {
560 radix = 10;
561 scanFraction(pos);
562
563 if (acceptOneOfThenPut('f', 'F')) {
564 tk = TokenKind.FLOATLITERAL;
565 } else {
566 acceptOneOfThenPut('d', 'D');
567 tk = TokenKind.DOUBLELITERAL;
568 }
569 }
570
571 /**
572 * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
573 *
574 * @param pos position of the first character in literal.
575 */
576 private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
577 radix = 16;
578 Assert.check(is('.'));
579 putThenNext();
580 skipIllegalUnderscores();
581
582 if (digit(pos, 16) >= 0) {
583 seendigit = true;
584 scanDigits(pos, 16);
585 }
586
587 if (!seendigit)
588 lexError(pos, Errors.InvalidHexNumber);
589 else
590 scanHexExponentAndSuffix(pos);
591 }
592
593 /**
594 * Skip over underscores and report as a error if found.
595 */
596 private void skipIllegalUnderscores() {
597 if (is('_')) {
598 lexError(position(), Errors.IllegalUnderscore);
599 skip('_');
600 }
601 }
602
603 /**
604 * Read a number. (Spec. 3.10)
605 *
606 * @param pos position of the first character in literal.
607 * @param radix the radix of the number; one of 2, 8, 10, 16.
608 */
609 private void scanNumber(int pos, int radix) {
610 // for octal, allow base-10 digit in case it's a float literal
611 this.radix = radix;
612 int digitRadix = (radix == 8 ? 10 : radix);
613 int firstDigit = digit(pos, Math.max(10, digitRadix));
614 boolean seendigit = firstDigit >= 0;
615 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
616
617 if (seendigit) {
618 scanDigits(pos, digitRadix);
619 }
620
621 if (radix == 16 && is('.')) {
622 scanHexFractionAndSuffix(pos, seendigit);
623 } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
624 scanHexExponentAndSuffix(pos);
625 } else if (digitRadix == 10 && is('.')) {
626 putThenNext();
627 scanFractionAndSuffix(pos);
628 } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {
629 scanFractionAndSuffix(pos);
630 } else {
631 if (!seenValidDigit) {
632 switch (radix) {
633 case 2:
634 lexError(pos, Errors.InvalidBinaryNumber);
635 break;
636 case 16:
637 lexError(pos, Errors.InvalidHexNumber);
638 break;
639 }
640 }
641
642 if (acceptOneOf('l', 'L')) {
643 tk = TokenKind.LONGLITERAL;
644 } else {
645 tk = TokenKind.INTLITERAL;
646 }
647 }
648 }
649
650 /**
651 * Determines if the sequence in the literal buffer is a token (keyword, operator.)
652 */
653 private void checkIdent() {
654 name = names.fromString(sb.toString());
655 tk = tokens.lookupKind(name);
656 }
657
658 /**
659 * Read an identifier. (Spec. 3.8)
660 */
661 private void scanIdent() {
662 putThenNext();
663
664 do {
665 switch (get()) {
666 case 'A': case 'B': case 'C': case 'D': case 'E':
667 case 'F': case 'G': case 'H': case 'I': case 'J':
668 case 'K': case 'L': case 'M': case 'N': case 'O':
669 case 'P': case 'Q': case 'R': case 'S': case 'T':
670 case 'U': case 'V': case 'W': case 'X': case 'Y':
671 case 'Z':
672 case 'a': case 'b': case 'c': case 'd': case 'e':
673 case 'f': case 'g': case 'h': case 'i': case 'j':
674 case 'k': case 'l': case 'm': case 'n': case 'o':
675 case 'p': case 'q': case 'r': case 's': case 't':
676 case 'u': case 'v': case 'w': case 'x': case 'y':
677 case 'z':
678 case '$': case '_':
679 case '0': case '1': case '2': case '3': case '4':
680 case '5': case '6': case '7': case '8': case '9':
681 break;
682
683 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
684 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
685 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
686 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
687 case '\u0015': case '\u0016': case '\u0017':
688 case '\u0018': case '\u0019': case '\u001B':
689 case '\u007F':
690 next();
691 continue;
692
693 case '\u001A': // EOI is also a legal identifier part
694 if (isEOF()) {
695 checkIdent();
696 return;
697 }
698
699 next();
700 continue;
701
702 default:
703 boolean isJavaIdentifierPart;
704
705 if (isASCII()) {
706 // all ASCII range chars already handled, above
707 isJavaIdentifierPart = false;
708 } else {
709 if (Character.isIdentifierIgnorable(get())) {
710 next();
711 continue;
712 }
713
714 isJavaIdentifierPart = isSurrogate()
715 ? Character.isJavaIdentifierPart(getCodepoint())
716 : Character.isJavaIdentifierPart(get());
717 }
718
719 if (!isJavaIdentifierPart) {
720 checkIdent();
721 return;
722 }
723 }
724
725 putThenNext();
726 } while (true);
727 }
728
729 /**
730 * Return true if ch can be part of an operator.
731 *
732 * @param ch character to check.
733 *
734 * @return true if ch can be part of an operator.
735 */
736 private boolean isSpecial(char ch) {
737 switch (ch) {
738 case '!': case '%': case '&': case '*': case '?':
739 case '+': case '-': case ':': case '<': case '=':
740 case '>': case '^': case '|': case '~':
741 case '@':
742 return true;
743
744 default:
745 return false;
746 }
747 }
748
749 /**
750 * Read longest possible sequence of special characters and convert to token.
751 */
752 private void scanOperator() {
753 while (true) {
754 put();
755 TokenKind newtk = tokens.lookupKind(sb.toString());
756
757 if (newtk == TokenKind.IDENTIFIER) {
758 sb.setLength(sb.length() - 1);
759 break;
760 }
761
762 tk = newtk;
763 next();
764
765 if (!isSpecial(get())) {
766 break;
767 }
768 }
769 }
770
771 /**
772 * Read token (main entrypoint.)
773 */
774 public Token readToken() {
775 sb.setLength(0);
776 name = null;
777 radix = 0;
778 isTextBlock = false;
779 hasEscapeSequences = false;
780
781 int pos;
782 List<Comment> comments = null;
783
784 try {
785 loop: while (true) {
786 pos = position();
787
788 switch (get()) {
789 case ' ': // (Spec 3.6)
790 case '\t': // (Spec 3.6)
791 case '\f': // (Spec 3.6)
792 skipWhitespace();
793 processWhiteSpace(pos, position());
794 break;
795
796 case '\n': // (Spec 3.4)
797 next();
798 processLineTerminator(pos, position());
799 break;
800
801 case '\r': // (Spec 3.4)
802 next();
803 accept('\n');
804 processLineTerminator(pos, position());
805 break;
806
807 case 'A': case 'B': case 'C': case 'D': case 'E':
808 case 'F': case 'G': case 'H': case 'I': case 'J':
809 case 'K': case 'L': case 'M': case 'N': case 'O':
810 case 'P': case 'Q': case 'R': case 'S': case 'T':
811 case 'U': case 'V': case 'W': case 'X': case 'Y':
812 case 'Z':
813 case 'a': case 'b': case 'c': case 'd': case 'e':
814 case 'f': case 'g': case 'h': case 'i': case 'j':
815 case 'k': case 'l': case 'm': case 'n': case 'o':
816 case 'p': case 'q': case 'r': case 's': case 't':
817 case 'u': case 'v': case 'w': case 'x': case 'y':
818 case 'z':
819 case '$': case '_': // (Spec. 3.8)
820 scanIdent();
821 break loop;
822
823 case '0': // (Spec. 3.10)
824 next();
825
826 if (acceptOneOf('x', 'X')) {
827 skipIllegalUnderscores();
828 scanNumber(pos, 16);
829 } else if (acceptOneOf('b', 'B')) {
830 skipIllegalUnderscores();
831 scanNumber(pos, 2);
832 } else {
833 put('0');
834
835 if (is('_')) {
836 int savePos = position();
837 skip('_');
838
839 if (digit(pos, 10) < 0) {
840 lexError(savePos, Errors.IllegalUnderscore);
841 }
842 }
843
844 scanNumber(pos, 8);
845 }
846 break loop;
847
848 case '1': case '2': case '3': case '4':
849 case '5': case '6': case '7': case '8': case '9': // (Spec. 3.10)
850 scanNumber(pos, 10);
851 break loop;
852
853 case '.': // (Spec. 3.12)
854 if (accept("...")) {
855 put("...");
856 tk = TokenKind.ELLIPSIS;
857 } else {
858 next();
859 int savePos = position();
860
861 if (accept('.')) {
862 lexError(savePos, Errors.IllegalDot);
863 } else if (digit(pos, 10) >= 0) {
864 put('.');
865 scanFractionAndSuffix(pos); // (Spec. 3.10)
866 } else {
867 tk = TokenKind.DOT;
868 }
869 }
870 break loop;
871
872 case ',': // (Spec. 3.12)
873 next();
874 tk = TokenKind.COMMA;
875 break loop;
876
877 case ';': // (Spec. 3.12)
878 next();
879 tk = TokenKind.SEMI;
880 break loop;
881
882 case '(': // (Spec. 3.12)
883 next();
884 tk = TokenKind.LPAREN;
885 break loop;
886
887 case ')': // (Spec. 3.12)
888 next();
889 tk = TokenKind.RPAREN;
890 break loop;
891
892 case '[': // (Spec. 3.12)
893 next();
894 tk = TokenKind.LBRACKET;
895 break loop;
896
897 case ']': // (Spec. 3.12)
898 next();
899 tk = TokenKind.RBRACKET;
900 break loop;
901
902 case '{': // (Spec. 3.12)
903 next();
904 tk = TokenKind.LBRACE;
905 break loop;
906
907 case '}': // (Spec. 3.12)
908 next();
909 tk = TokenKind.RBRACE;
910 break loop;
911
912 case '/':
913 next();
914
915 if (accept('/')) { // (Spec. 3.7)
916 skipToEOLN();
917
918 if (!isEOF()) {
919 comments = appendComment(comments, processComment(pos, position(), CommentStyle.LINE));
920 }
921 break;
922 } else if (accept('*')) { // (Spec. 3.7)
923 boolean isEmpty = false;
924 CommentStyle style;
925
926 if (accept('*')) {
927 style = CommentStyle.JAVADOC;
928
929 if (is('/')) {
930 isEmpty = true;
931 }
932 } else {
933 style = CommentStyle.BLOCK;
934 }
935
936 if (!isEmpty) {
937 while (!isEOF()) {
938 if (accept('*')) {
939 if (is('/')) {
940 break;
941 }
942 } else {
943 next();
944 }
945 }
946 }
947
948 if (accept('/')) {
949 comments = appendComment(comments, processComment(pos, position(), style));
950
951 break;
952 } else {
953 lexError(pos, Errors.UnclosedComment);
954
955 break loop;
956 }
957 } else if (accept('=')) {
958 tk = TokenKind.SLASHEQ; // (Spec. 3.12)
959 } else {
960 tk = TokenKind.SLASH; // (Spec. 3.12)
961 }
962 break loop;
963
964 case '\'': // (Spec. 3.10)
965 next();
966
967 if (accept('\'')) {
968 lexError(pos, Errors.EmptyCharLit);
969 } else {
970 if (isEOLN()) {
971 lexError(pos, Errors.IllegalLineEndInCharLit);
972 }
973
974 scanLitChar(pos);
975
976 if (accept('\'')) {
977 tk = TokenKind.CHARLITERAL;
978 } else {
979 lexError(pos, Errors.UnclosedCharLit);
980 }
981 }
982 break loop;
983
984 case '\"': // (Spec. 3.10)
985 scanString(pos);
986 break loop;
987
988 default:
989 if (isSpecial(get())) {
990 scanOperator();
991 } else {
992 boolean isJavaIdentifierStart;
993
994 if (isASCII()) {
995 // all ASCII range chars already handled, above
996 isJavaIdentifierStart = false;
997 } else {
998 isJavaIdentifierStart = isSurrogate()
999 ? Character.isJavaIdentifierStart(getCodepoint())
1000 : Character.isJavaIdentifierStart(get());
1001 }
1002
1003 if (isJavaIdentifierStart) {
1004 scanIdent();
1005 } else if (digit(pos, 10) >= 0) {
1006 scanNumber(pos, 10);
1007 } else if (is((char)EOI) || isEOF()) {
1008 tk = TokenKind.EOF;
1009 pos = position();
1010 } else {
1011 String arg;
1012
1013 if (isSurrogate()) {
1014 int codePoint = getCodepoint();
1015 char hi = Character.highSurrogate(codePoint);
1016 char lo = Character.lowSurrogate(codePoint);
1017 arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
1018 } else {
1019 char ch = get();
1020 arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
1021 String.format("\\u%04x", (int) ch);
1022 }
1023
1024 lexError(pos, Errors.IllegalChar(arg));
1025 next();
1026 }
1027 }
1028 break loop;
1029 }
1030 }
1031
1032 int endPos = position();
1033
1034 if (tk.tag == Token.Tag.DEFAULT) {
1035 return new Token(tk, pos, endPos, comments);
1036 } else if (tk.tag == Token.Tag.NAMED) {
1037 return new NamedToken(tk, pos, endPos, name, comments);
1038 } else {
1039 // Get characters from string buffer.
1040 String string = sb.toString();
1041
1042 // If a text block.
1043 if (isTextBlock) {
1044 // Verify that the incidental indentation is consistent.
1045 if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
1046 Set<TextBlockSupport.WhitespaceChecks> checks =
1047 TextBlockSupport.checkWhitespace(string);
1048 if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
1049 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1050 Warnings.InconsistentWhiteSpaceIndentation);
1051 }
1052 if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1053 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1054 Warnings.TrailingWhiteSpaceWillBeRemoved);
1055 }
1056 }
1057 // Remove incidental indentation.
1058 try {
1059 string = string.stripIndent();
1060 } catch (Exception ex) {
1061 // Error already reported, just use unstripped string.
1062 }
1063 }
1064
1065 // Translate escape sequences if present.
1066 if (hasEscapeSequences) {
1067 try {
1068 string = string.translateEscapes();
1069 } catch (Exception ex) {
1070 // Error already reported, just use untranslated string.
1071 }
1072 }
1073
1074 if (tk.tag == Token.Tag.STRING) {
1075 // Build string token.
1076 return new StringToken(tk, pos, endPos, string, comments);
1077 } else {
1078 // Build numeric token.
1079 return new NumericToken(tk, pos, endPos, string, radix, comments);
1080 }
1081 }
1082 } finally {
1083 int endPos = position();
1084
1085 if (scannerDebug) {
1086 System.out.println("nextToken(" + pos
1087 + "," + endPos + ")=|" +
1088 new String(getRawCharacters(pos, endPos))
1089 + "|");
1090 }
1091 }
1092 }
1093
1094 /**
1095 * Appends a comment to the list of comments preceding the current token.
1096 *
1097 * @param comments existing list of comments.
1098 * @param comment comment to append.
1099 *
1100 * @return new list with comment prepended to the existing list.
1101 */
1102 List<Comment> appendComment(List<Comment> comments, Comment comment) {
1103 return comments == null ?
1104 List.of(comment) :
1105 comments.prepend(comment);
1106 }
1107
1108 /**
1109 * Return the position where a lexical error occurred.
1110 *
1111 * @return position in the input buffer of where the error occurred.
1112 */
1113 public int errPos() {
1114 return errPos;
1115 }
1116
1117 /**
1118 * Set the position where a lexical error occurred.
1119 *
1120 * @param pos position in the input buffer of where the error occurred.
1121 */
1122 public void errPos(int pos) {
1123 errPos = pos;
1124 }
1125
1126 /**
1127 * Called when a complete comment has been scanned. pos and endPos
1128 * will mark the comment boundary.
1129 *
1130 * @param pos position of the opening / in the input buffer.
1131 * @param endPos position + 1 of the closing / in the input buffer.
1132 * @param style style of comment.
1133 *
1134 * @return the constructed BasicComment.
1135 */
1136 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1137 if (scannerDebug) {
1138 System.out.println("processComment(" + pos
1139 + "," + endPos + "," + style + ")=|"
1140 + new String(getRawCharacters(pos, endPos))
1141 + "|");
1142 }
1143
1144 char[] buf = getRawCharacters(pos, endPos);
1145
1146 return new BasicComment(style, fac, buf, pos);
1147 }
1148
1149 /**
1150 * Called when a complete whitespace run has been scanned. pos and endPos
1151 * will mark the whitespace boundary.
1152 *
1153 * (Spec 3.6)
1154 *
1155 * @param pos position in input buffer of first whitespace character.
1156 * @param endPos position + 1 in input buffer of last whitespace character.
1157 */
1158 protected void processWhiteSpace(int pos, int endPos) {
1159 if (scannerDebug) {
1160 System.out.println("processWhitespace(" + pos
1161 + "," + endPos + ")=|" +
1162 new String(getRawCharacters(pos, endPos))
1163 + "|");
1164 }
1165 }
1166
1167 /**
1168 * Called when a line terminator has been processed.
1169 *
1170 * @param pos position in input buffer of first character in sequence.
1171 * @param endPos position + 1 in input buffer of last character in sequence.
1172 */
1173 protected void processLineTerminator(int pos, int endPos) {
1174 if (scannerDebug) {
1175 System.out.println("processTerminator(" + pos
1176 + "," + endPos + ")=|" +
1177 new String(getRawCharacters(pos, endPos))
1178 + "|");
1179 }
1180 }
1181
1182 /**
1183 * Build a map for translating between line numbers and positions in the input.
1184 *
1185 * @return a LineMap
1186 */
1187 public Position.LineMap getLineMap() {
1188 return Position.makeLineMap(getRawCharacters(), length(), false);
1189 }
1190
1191 /**
1192 * Scan a documentation comment; determine if a deprecated tag is present.
1193 * Called once the initial /, * have been skipped, positioned at the second *
1194 * (which is treated as the beginning of the first line).
1195 * Stops positioned at the closing '/'.
1196 */
1197 protected static class BasicComment extends PositionTrackingReader implements Comment {
1198 /**
1199 * Style of comment
1200 * LINE starting with //
1201 * BLOCK starting with /*
1202 * JAVADOC starting with /**
1203 */
1204 CommentStyle cs;
1205
1206 /**
1207 * true if comment contains @deprecated at beginning of a line.
1208 */
1209 protected boolean deprecatedFlag = false;
1210
1211 /**
1212 * true if comment has been fully scanned.
1213 */
1214 protected boolean scanned = false;
1215
1216 /**
1217 * Constructor.
1218 *
1219 * @param cs comment style
1220 * @param sf Scan factory.
1221 * @param array Array containing contents of source.
1222 * @param offset Position offset in original source buffer.
1223 */
1224 protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
1225 super(sf, array, offset);
1226 this.cs = cs;
1227 }
1228
1229 /**
1230 * Return comment body text minus comment adornments or null if not scanned.
1231 *
1232 * @return comment body text.
1233 */
1234 public String getText() {
1235 return null;
1236 }
1237
1238 /**
1239 * Return buffer position in original buffer mapped from buffer position in comment.
1240 *
1241 * @param pos buffer position in comment.
1242 *
1243 * @return buffer position in original buffer.
1244 */
1245 public int getSourcePos(int pos) {
1246 return -1;
1247 }
1248
1249 /**
1250 * Return style of comment.
1251 * LINE starting with //
1252 * BLOCK starting with /*
1253 * JAVADOC starting with /**
1254 *
1255 * @return
1256 */
1257 public CommentStyle getStyle() {
1258 return cs;
1259 }
1260
1261 /**
1262 * true if comment contains @deprecated at beginning of a line.
1263 *
1264 * @return true if comment contains @deprecated.
1265 */
1266 public boolean isDeprecated() {
1267 if (!scanned && cs == CommentStyle.JAVADOC) {
1268 scanDocComment();
1269 }
1270
1271 return deprecatedFlag;
1272 }
1273
1274 /**
1275 * Scan JAVADOC comment for details.
1276 */
1277 protected void scanDocComment() {
1278 try {
1279 boolean deprecatedPrefix = false;
1280 accept("/**");
1281
1282 forEachLine:
1283 while (!isEOF()) {
1284 // Skip optional WhiteSpace at beginning of line
1285 skipWhitespace();
1286
1287 // Skip optional consecutive Stars
1288 while (accept('*')) {
1289 if (is('/')) {
1290 return;
1291 }
1292 }
1293
1294 // Skip optional WhiteSpace after Stars
1295 skipWhitespace();
1296
1297 // At beginning of line in the JavaDoc sense.
1298 deprecatedPrefix = deprecatedFlag || accept("@deprecated");
1299
1300 if (deprecatedPrefix && !isEOF()) {
1301 if (Character.isWhitespace(get())) {
1302 deprecatedFlag = true;
1303 } else if (accept('*')) {
1304 if (is('/')) {
1305 deprecatedFlag = true;
1306 return;
1307 }
1308 }
1309 }
1310
1311 // Skip rest of line
1312 while (!isEOF()) {
1313 switch (get()) {
1314 case '*':
1315 next();
1316
1317 if (is('/')) {
1318 return;
1319 }
1320
1321 break;
1322 case '\r': // (Spec 3.4)
1323 case '\n': // (Spec 3.4)
1324 accept('\r');
1325 accept('\n');
1326 continue forEachLine;
1327
1328 default:
1329 next();
1330 break;
1331 }
1332 } // rest of line
1333 } // forEachLine
1334 return;
1335 } finally {
1336 scanned = true;
1337 }
1338 }
1339 }
1340 }
|