< prev index next >
src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
Print this page
rev 60227 : 8224225: Tokenizer improvements
Reviewed-by: jlaskey
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -28,124 +28,151 @@
import com.sun.tools.javac.code.Lint;
import com.sun.tools.javac.code.Lint.LintCategory;
import com.sun.tools.javac.code.Preview;
import com.sun.tools.javac.code.Source;
import com.sun.tools.javac.code.Source.Feature;
+import com.sun.tools.javac.file.JavacFileManager;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.resources.CompilerProperties.Warnings;
import com.sun.tools.javac.util.*;
import com.sun.tools.javac.util.JCDiagnostic.*;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
import java.nio.CharBuffer;
-import java.util.HashSet;
import java.util.Set;
+import java.util.regex.Pattern;
import static com.sun.tools.javac.parser.Tokens.*;
-import static com.sun.tools.javac.util.LayoutCharacters.*;
+import static com.sun.tools.javac.util.LayoutCharacters.EOI;
-/** The lexical analyzer maps an input stream consisting of
- * ASCII characters and Unicode escapes into a token sequence.
+/**
+ * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
+ * escape sequences into a token sequence.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
-public class JavaTokenizer {
-
+public class JavaTokenizer extends UnicodeReader {
+ /**
+ * If true then prints token information after each nextToken().
+ */
private static final boolean scannerDebug = false;
- /** The source language setting.
+ /**
+ * Sentinal for non-value.
+ */
+ private int NOT_FOUND = -1;
+
+ /**
+ * The source language setting. Copied from scanner factory.
*/
private Source source;
- /** The preview language setting. */
+ /**
+ * The preview language setting. Copied from scanner factory.
+ */
private Preview preview;
- /** The log to be used for error reporting.
+ /**
+ * The log to be used for error reporting. Copied from scanner factory.
*/
private final Log log;
- /** The token factory. */
+ /**
+ * The token factory. Copied from scanner factory.
+ */
private final Tokens tokens;
- /** The token kind, set by nextToken().
+ /**
+ * The names factory. Copied from scanner factory.
+ */
+ private final Names names;
+
+ /**
+ * The token kind, set by nextToken().
*/
protected TokenKind tk;
- /** The token's radix, set by nextToken().
+ /**
+ * The token's radix, set by nextToken().
*/
protected int radix;
- /** The token's name, set by nextToken().
+ /**
+ * The token's name, set by nextToken().
*/
protected Name name;
- /** The position where a lexical error occurred;
+ /**
+ * The position where a lexical error occurred;
*/
protected int errPos = Position.NOPOS;
- /** The Unicode reader (low-level stream reader).
- */
- protected UnicodeReader reader;
-
- /** If is a text block
+ /**
+ * true if is a text block, set by nextToken().
*/
protected boolean isTextBlock;
- /** If contains escape sequences
+ /**
+ * true if contains escape sequences, set by nextToken().
*/
protected boolean hasEscapeSequences;
+ /**
+ * Buffer for building literals, used by nextToken().
+ */
+ protected StringBuilder sb;
+
+ /**
+ * Origin scanner factory.
+ */
protected ScannerFactory fac;
- // The set of lint options currently in effect. It is initialized
- // from the context, and then is set/reset as needed by Attr as it
- // visits all the various parts of the trees during attribution.
+ /**
+ * The set of lint options currently in effect. It is initialized
+ * from the context, and then is set/reset as needed by Attr as it
+ * visits all the various parts of the trees during attribution.
+ */
protected Lint lint;
- private static final boolean hexFloatsWork = hexFloatsWork();
- private static boolean hexFloatsWork() {
- try {
- Float.valueOf("0x1.0p1");
- return true;
- } catch (NumberFormatException ex) {
- return false;
- }
+ /**
+ * Construct a Java token scanner from the input character buffer.
+ *
+ * @param fac the factory which created this Scanner.
+ * @param cb the input character buffer.
+ */
+ protected JavaTokenizer(ScannerFactory fac, CharBuffer cb) {
+ this(fac, JavacFileManager.toArray(cb), cb.limit());
}
/**
- * Create a scanner from the input array. This method might
- * modify the array. To avoid copying the input array, ensure
- * that {@code inputLength < input.length} or
- * {@code input[input.length -1]} is a white space character.
+ * Construct a Java token scanner from the input character array.
*
* @param fac the factory which created this Scanner
- * @param buf the input, might be modified
- * Must be positive and less than or equal to input.length.
+ * @param array the input character array.
+ * @param length The length of the meaningful content in the array.
*/
- protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
- this(fac, new UnicodeReader(fac, buf));
- }
-
- protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
- this(fac, new UnicodeReader(fac, buf, inputLength));
- }
-
- protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
+ protected JavaTokenizer(ScannerFactory fac, char[] array, int length) {
+ super(fac, array, length);
this.fac = fac;
this.log = fac.log;
+ this.names = fac.names;
this.tokens = fac.tokens;
this.source = fac.source;
this.preview = fac.preview;
- this.reader = reader;
this.lint = fac.lint;
+ this.sb = new StringBuilder(256);
}
+ /**
+ * Check the source level for a lexical feature.
+ *
+ * @param pos position in input buffer.
+ * @param feature feature to verify.
+ */
protected void checkSourceLevel(int pos, Feature feature) {
if (preview.isPreview(feature) && !preview.isEnabled()) {
//preview feature without --preview flag, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
} else if (!feature.allowedInSource(source)) {
@@ -155,486 +182,452 @@
//use of preview feature, warn
preview.warnPreview(pos, feature);
}
}
- /** Report an error at the given position using the provided arguments.
+ /**
+ * Report an error at the given position using the provided arguments.
+ *
+ * @param pos position in input buffer.
+ * @param key error key to report.
*/
protected void lexError(int pos, JCDiagnostic.Error key) {
log.error(pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
+ /**
+ * Report an error at the given position using the provided arguments.
+ *
+ * @param flags diagnostic flags.
+ * @param pos position in input buffer.
+ * @param key error key to report.
+ */
protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
log.error(flags, pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
+ /**
+ * Report an error at the given position using the provided arguments.
+ *
+ * @param lc lint category.
+ * @param pos position in input buffer.
+ * @param key error key to report.
+ */
protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
log.warning(lc, dp, key);
}
- /** Read next character in character or string literal and copy into sbuf.
- * pos - start of literal offset
- * translateEscapesNow - true if String::translateEscapes is not available
- * in the java.base libs. Occurs during bootstrapping.
- * multiline - true if scanning a text block. Allows newlines to be embedded
- * in the result.
- */
- private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
- if (reader.ch == '\\') {
- if (reader.peekChar() == '\\' && !reader.isUnicode()) {
- reader.skipChar();
- if (!translateEscapesNow) {
- reader.putChar(false);
- }
- reader.putChar(true);
- } else {
- reader.nextChar(translateEscapesNow);
- switch (reader.ch) {
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- char leadch = reader.ch;
- int oct = reader.digit(pos, 8);
- reader.nextChar(translateEscapesNow);
- if ('0' <= reader.ch && reader.ch <= '7') {
- oct = oct * 8 + reader.digit(pos, 8);
- reader.nextChar(translateEscapesNow);
- if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
- oct = oct * 8 + reader.digit(pos, 8);
- reader.nextChar(translateEscapesNow);
- }
+ /**
+ * Add a character to the literal buffer.
+ *
+ * @param ch character to add.
+ */
+ protected void put(char ch) {
+ sb.append(ch);
}
- if (translateEscapesNow) {
- reader.putChar((char)oct);
+
+ /**
+ * Add a codepoint to the literal buffer.
+ *
+ * @param codePoint codepoint to add.
+ */
+ protected void putCodePoint(int codePoint) {
+ sb.appendCodePoint(codePoint);
}
- break;
- case 'b':
- reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
- case 't':
- reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
- case 'n':
- reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
- case 'f':
- reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
- case 'r':
- reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
- case '\'':
- case '\"':
- case '\\':
- reader.putChar(true); break;
- case 's':
- checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
- reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
- case '\n':
- case '\r':
- if (!multiline) {
- lexError(reader.bp, Errors.IllegalEscChar);
+
+ /**
+ * Add current character or codepoint to the literal buffer.
+ */
+ protected void put() {
+ if (isSurrogate()) {
+ putCodePoint(getCodepoint());
} else {
- checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
- int start = reader.bp;
- if (reader.ch == '\r' && reader.peekChar() == '\n') {
- reader.nextChar(translateEscapesNow);
- }
- reader.nextChar(translateEscapesNow);
- processLineTerminator(start, reader.bp);
- }
- break;
- default:
- lexError(reader.bp, Errors.IllegalEscChar);
- }
- }
- } else if (reader.bp != reader.buflen) {
- reader.putChar(true);
+ put(get());
}
}
- /** Interim access to String methods used to support text blocks.
- * Required to handle bootstrapping with pre-text block jdks.
- * Should be replaced with direct calls in the 'next' jdk.
- */
- static class TextBlockSupport {
- /** Reflection method to remove incidental indentation.
+ /**
+ * Add a string to the literal buffer.
*/
- private static final Method stripIndent;
+ protected void put(String string) {
+ sb.append(string);
+ }
- /** Reflection method to translate escape sequences.
+ /**
+ * Add current character or codepoint to the literal buffer then return next character.
*/
- private static final Method translateEscapes;
+ protected char putThenNext() {
+ put();
- /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
- */
- private static final boolean hasSupport;
+ return next();
+ }
- /** Get a string method via refection or null if not available.
+ /**
+ * If the specified character ch matches the current character then add current character
+ * to the literal buffer and then advance.
+ *
+ * @param ch character to match.
+ *
+ * @return true if ch matches current character.
*/
- private static Method getStringMethodOrNull(String name) {
- try {
- return String.class.getMethod(name);
- } catch (Exception ex) {
- // Method not available, return null.
- }
- return null;
- }
+ protected boolean acceptThenPut(char ch) {
+ if (is(ch)) {
+ put(get());
+ next();
- static {
- // Get text block string methods.
- stripIndent = getStringMethodOrNull("stripIndent");
- translateEscapes = getStringMethodOrNull("translateEscapes");
- // true if stripIndent and translateEscapes are available in the bootstrap jdk.
- hasSupport = stripIndent != null && translateEscapes != null;
- }
-
- /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
- */
- static boolean hasSupport() {
- return hasSupport;
- }
-
- /** Return the leading whitespace count (indentation) of the line.
- */
- private static int indent(String line) {
- return line.length() - line.stripLeading().length();
- }
-
- enum WhitespaceChecks {
- INCONSISTENT,
- TRAILING
- };
-
- /** Check that the use of white space in content is not problematic.
- */
- static Set<WhitespaceChecks> checkWhitespace(String string) {
- // Start with empty result set.
- Set<WhitespaceChecks> checks = new HashSet<>();
- // No need to check empty strings.
- if (string.isEmpty()) {
- return checks;
- }
- // Maximum common indentation.
- int outdent = 0;
- // No need to check indentation if opting out (last line is empty.)
- char lastChar = string.charAt(string.length() - 1);
- boolean optOut = lastChar == '\n' || lastChar == '\r';
- // Split string based at line terminators.
- String[] lines = string.split("\\R");
- int length = lines.length;
- // Extract last line.
- String lastLine = length == 0 ? "" : lines[length - 1];
- if (!optOut) {
- // Prime with the last line indentation (may be blank.)
- outdent = indent(lastLine);
- for (String line : lines) {
- // Blanks lines have no influence (last line accounted for.)
- if (!line.isBlank()) {
- outdent = Integer.min(outdent, indent(line));
- if (outdent == 0) {
- break;
- }
- }
- }
- }
- // Last line is representative.
- String start = lastLine.substring(0, outdent);
- for (String line : lines) {
- // Fail if a line does not have the same indentation.
- if (!line.isBlank() && !line.startsWith(start)) {
- // Mix of different white space
- checks.add(WhitespaceChecks.INCONSISTENT);
- }
- // Line has content even after indent is removed.
- if (outdent < line.length()) {
- // Is the last character a white space.
- lastChar = line.charAt(line.length() - 1);
- if (Character.isWhitespace(lastChar)) {
- // Has trailing white space.
- checks.add(WhitespaceChecks.TRAILING);
- }
- }
- }
- return checks;
+ return true;
}
- /** Invoke String::stripIndent through reflection.
- */
- static String stripIndent(String string) {
- try {
- string = (String)stripIndent.invoke(string);
- } catch (InvocationTargetException | IllegalAccessException ex) {
- throw new RuntimeException(ex);
- }
- return string;
+ return false;
}
- /** Invoke String::translateEscapes through reflection.
+ /**
+ * If either ch1 or ch2 matches the current character then add current character
+ * to the literal buffer and then advance.
+ *
+ * @param ch1 first character to match.
+ * @param ch2 second character to match.
+ *
+ * @return true if either ch1 or ch2 matches current character.
*/
- static String translateEscapes(String string) {
- try {
- string = (String)translateEscapes.invoke(string);
- } catch (InvocationTargetException | IllegalAccessException ex) {
- throw new RuntimeException(ex);
- }
- return string;
+ protected boolean acceptOneOfThenPut(char ch1, char ch2) {
+ if (isOneOf(ch1, ch2)) {
+ put(get());
+ next();
+
+ return true;
}
+
+ return false;
}
- /** Test for EOLN.
+ /**
+ * Test if the current character is a line terminator.
+ *
+ * @return true if current character is a line terminator.
*/
private boolean isEOLN() {
- return reader.ch == LF || reader.ch == CR;
+ return isOneOf('\n', '\r');
}
- /** Test for CRLF.
+ /**
+ * Skip and process a line terminator sequence.
*/
- private boolean isCRLF() {
- return reader.ch == CR && reader.peekChar() == LF;
+ private void skipLineTerminator() {
+ int start = position();
+ accept('\r');
+ accept('\n');
+ processLineTerminator(start, position());
}
- /** Count and skip repeated occurrences of the specified character.
+ /**
+ * Processes the current character and places in the literal buffer. If the current
+ * character is a backslash then the next character is validated as a proper
+ * escape character. Conversion of escape sequences takes place at end of nextToken().
+ *
+ * @param pos position of the first character in literal.
*/
- private int countChar(char ch, int max) {
- int count = 0;
- for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
- reader.scanChar();
+ private void scanLitChar(int pos) {
+ if (acceptThenPut('\\')) {
+ hasEscapeSequences = true;
+
+ switch (get()) {
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ char leadch = get();
+ putThenNext();
+
+ if (inRange('0', '7')) {
+ putThenNext();
+
+ if (leadch <= '3' && inRange('0', '7')) {
+ putThenNext();
}
- return count;
}
+ break;
- /** Skip and process a line terminator.
- */
- private void skipLineTerminator() {
- int start = reader.bp;
- if (isCRLF()) {
- reader.scanChar();
+ case 'b':
+ case 't':
+ case 'n':
+ case 'f':
+ case 'r':
+ case '\'':
+ case '\"':
+ case '\\':
+ putThenNext();
+ break;
+
+ case 's':
+ checkSourceLevel(position(), Feature.TEXT_BLOCKS);
+ putThenNext();
+ break;
+
+ case '\n':
+ case '\r':
+ if (isTextBlock) {
+ skipLineTerminator();
+ // Normalize line terminator.
+ put('\n');
+ } else {
+ lexError(position(), Errors.IllegalEscChar);
+ }
+ break;
+
+ default:
+ lexError(position(), Errors.IllegalEscChar);
+ break;
+ }
+ } else {
+ putThenNext();
}
- reader.scanChar();
- processLineTerminator(start, reader.bp);
}
- /** Scan a string literal or text block.
+ /**
+ * Scan a string literal or text block.
+ *
+ * @param pos position of the first character in literal.
*/
private void scanString(int pos) {
- // Clear flags.
- isTextBlock = false;
- hasEscapeSequences = false;
- // Track the end of first line for error recovery.
- int firstEOLN = -1;
- // Attempt to scan for up to 3 double quotes.
- int openCount = countChar('\"', 3);
- switch (openCount) {
- case 1: // Starting a string literal.
- break;
- case 2: // Starting an empty string literal.
+ // Assume the best.
tk = Tokens.TokenKind.STRINGLITERAL;
- return;
- case 3: // Starting a text block.
+ // Track the end of first line for error recovery.
+ int firstEOLN = NOT_FOUND;
+ // Check for text block delimiter.
+ isTextBlock = accept("\"\"\"");
+
+ if (isTextBlock) {
// Check if preview feature is enabled for text blocks.
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
- isTextBlock = true;
+
// Verify the open delimiter sequence.
- while (reader.bp < reader.buflen) {
- char ch = reader.ch;
- if (ch != ' ' && ch != '\t' && ch != FF) {
- break;
+ // Error if the open delimiter sequence is not """<white space>*<LineTerminator>.
+ skipWhitespace();
+
+ if (isEOLN()) {
+ skipLineTerminator();
+ } else {
+ lexError(position(), Errors.IllegalTextBlockOpen);
+ return;
}
- reader.scanChar();
+
+ // While characters are available.
+ while (!isEOF()) {
+ if (accept("\"\"\"")) {
+ return;
}
+
if (isEOLN()) {
skipLineTerminator();
+ // Add normalized line terminator to literal buffer.
+ put('\n');
+
+ // Record first line terminator for error recovery.
+ if (firstEOLN == NOT_FOUND) {
+ firstEOLN = position();
+ }
} else {
- // Error if the open delimiter sequence is not
- // """<white space>*<LineTerminator>.
- lexError(reader.bp, Errors.IllegalTextBlockOpen);
- return;
+ // Add character to string buffer.
+ scanLitChar(pos);
}
- break;
}
+ } else {
+ // Skip first quote.
+ next();
+
// While characters are available.
- while (reader.bp < reader.buflen) {
- // If possible close delimiter sequence.
- if (reader.ch == '\"') {
- // Check to see if enough double quotes are present.
- int closeCount = countChar('\"', openCount);
- if (openCount == closeCount) {
- // Good result.
- tk = Tokens.TokenKind.STRINGLITERAL;
+ while (!isEOF()) {
+ if (accept('\"')) {
return;
}
- // False alarm, add double quotes to string buffer.
- reader.repeat('\"', closeCount);
- } else if (isEOLN()) {
+
+ if (isEOLN()) {
// Line terminator in string literal is an error.
// Fall out to unclosed string literal error.
- if (openCount == 1) {
break;
- }
- skipLineTerminator();
- // Add line terminator to string buffer.
- reader.putChar('\n', false);
- // Record first line terminator for error recovery.
- if (firstEOLN == -1) {
- firstEOLN = reader.bp;
- }
- } else if (reader.ch == '\\') {
- // Handle escape sequences.
- hasEscapeSequences = true;
- // Translate escapes immediately if TextBlockSupport is not available
- // during bootstrapping.
- boolean translateEscapesNow = !TextBlockSupport.hasSupport();
- scanLitChar(pos, translateEscapesNow, openCount != 1);
} else {
// Add character to string buffer.
- reader.putChar(true);
+ scanLitChar(pos);
+ }
}
}
+
// String ended without close delimiter sequence.
- lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
- if (firstEOLN != -1) {
- // Reset recovery position to point after open delimiter sequence.
- reader.reset(firstEOLN);
+ lexError(pos, isTextBlock ? Errors.UnclosedTextBlock : Errors.UnclosedStrLit);
+
+ if (firstEOLN != NOT_FOUND) {
+ // Reset recovery position to point after text block open delimiter sequence.
+ reset(firstEOLN);
}
}
+ /**
+ * Scan sequence of digits.
+ *
+ * @param pos position of the first character in literal.
+ * @param digitRadix radix of numeric literal.
+ */
private void scanDigits(int pos, int digitRadix) {
- char saveCh;
- int savePos;
+ int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
+ int trailingUnderscorePos;
+
do {
- if (reader.ch != '_') {
- reader.putChar(false);
+ if (!is('_')) {
+ put();
+ trailingUnderscorePos = NOT_FOUND;
+ } else {
+ trailingUnderscorePos = position();
+ }
+
+ next();
+ } while (digit(pos, digitRadix) >= 0 || is('_'));
+
+ if (leadingUnderscorePos != NOT_FOUND) {
+ lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
+ } else if (trailingUnderscorePos != NOT_FOUND) {
+ lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
}
- saveCh = reader.ch;
- savePos = reader.bp;
- reader.scanChar();
- } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
- if (saveCh == '_')
- lexError(savePos, Errors.IllegalUnderscore);
}
- /** Read fractional part of hexadecimal floating point number.
+ /**
+ * Read fractional part of hexadecimal floating point number.
+ *
+ * @param pos position of the first character in literal.
*/
private void scanHexExponentAndSuffix(int pos) {
- if (reader.ch == 'p' || reader.ch == 'P') {
- reader.putChar(true);
+ if (acceptOneOfThenPut('p', 'P')) {
skipIllegalUnderscores();
- if (reader.ch == '+' || reader.ch == '-') {
- reader.putChar(true);
- }
+ acceptOneOfThenPut('+', '-');
skipIllegalUnderscores();
- if (reader.digit(pos, 10) >= 0) {
+
+ if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
- if (!hexFloatsWork)
- lexError(pos, Errors.UnsupportedCrossFpLit);
- } else
+ } else {
lexError(pos, Errors.MalformedFpLit);
+ }
} else {
lexError(pos, Errors.MalformedFpLit);
}
- if (reader.ch == 'f' || reader.ch == 'F') {
- reader.putChar(true);
+
+ if (acceptOneOfThenPut('f', 'F')) {
tk = TokenKind.FLOATLITERAL;
radix = 16;
} else {
- if (reader.ch == 'd' || reader.ch == 'D') {
- reader.putChar(true);
- }
+ acceptOneOfThenPut('d', 'D');
tk = TokenKind.DOUBLELITERAL;
radix = 16;
}
}
- /** Read fractional part of floating point number.
+ /**
+ * Read fractional part of floating point number.
+ *
+ * @param pos position of the first character in literal.
*/
private void scanFraction(int pos) {
skipIllegalUnderscores();
- if (reader.digit(pos, 10) >= 0) {
+
+ if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
}
- int sp1 = reader.sp;
- if (reader.ch == 'e' || reader.ch == 'E') {
- reader.putChar(true);
+
+ int index = sb.length();
+
+ if (acceptOneOfThenPut('e', 'E')) {
skipIllegalUnderscores();
- if (reader.ch == '+' || reader.ch == '-') {
- reader.putChar(true);
- }
+ acceptOneOfThenPut('+', '-');
skipIllegalUnderscores();
- if (reader.digit(pos, 10) >= 0) {
+
+ if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
return;
}
+
lexError(pos, Errors.MalformedFpLit);
- reader.sp = sp1;
+ sb.setLength(index);
}
}
- /** Read fractional part and 'd' or 'f' suffix of floating point number.
+ /**
+ * Read fractional part and 'd' or 'f' suffix of floating point number.
+ *
+ * @param pos position of the first character in literal.
*/
private void scanFractionAndSuffix(int pos) {
radix = 10;
scanFraction(pos);
- if (reader.ch == 'f' || reader.ch == 'F') {
- reader.putChar(true);
+
+ if (acceptOneOfThenPut('f', 'F')) {
tk = TokenKind.FLOATLITERAL;
} else {
- if (reader.ch == 'd' || reader.ch == 'D') {
- reader.putChar(true);
- }
+ acceptOneOfThenPut('d', 'D');
tk = TokenKind.DOUBLELITERAL;
}
}
- /** Read fractional part and 'd' or 'f' suffix of floating point number.
+ /**
+ * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
+ *
+ * @param pos position of the first character in literal.
*/
private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
radix = 16;
- Assert.check(reader.ch == '.');
- reader.putChar(true);
+ Assert.check(is('.'));
+ putThenNext();
skipIllegalUnderscores();
- if (reader.digit(pos, 16) >= 0) {
+
+ if (digit(pos, 16) >= 0) {
seendigit = true;
scanDigits(pos, 16);
}
+
if (!seendigit)
lexError(pos, Errors.InvalidHexNumber);
else
scanHexExponentAndSuffix(pos);
}
+ /**
+ * Skip over underscores and report as a error if found.
+ */
private void skipIllegalUnderscores() {
- if (reader.ch == '_') {
- lexError(reader.bp, Errors.IllegalUnderscore);
- while (reader.ch == '_')
- reader.scanChar();
+ if (is('_')) {
+ lexError(position(), Errors.IllegalUnderscore);
+ skip('_');
}
}
- /** Read a number.
- * @param radix The radix of the number; one of 2, 8, 10, 16.
+ /**
+ * Read a number. (Spec. 3.10)
+ *
+ * @param pos position of the first character in literal.
+ * @param radix the radix of the number; one of 2, 8, 10, 16.
*/
private void scanNumber(int pos, int radix) {
// for octal, allow base-10 digit in case it's a float literal
this.radix = radix;
int digitRadix = (radix == 8 ? 10 : radix);
- int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
+ int firstDigit = digit(pos, Math.max(10, digitRadix));
boolean seendigit = firstDigit >= 0;
boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
+
if (seendigit) {
scanDigits(pos, digitRadix);
}
- if (radix == 16 && reader.ch == '.') {
+
+ if (radix == 16 && is('.')) {
scanHexFractionAndSuffix(pos, seendigit);
- } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
+ } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
scanHexExponentAndSuffix(pos);
- } else if (digitRadix == 10 && reader.ch == '.') {
- reader.putChar(true);
+ } else if (digitRadix == 10 && is('.')) {
+ putThenNext();
scanFractionAndSuffix(pos);
- } else if (digitRadix == 10 &&
- (reader.ch == 'e' || reader.ch == 'E' ||
- reader.ch == 'f' || reader.ch == 'F' ||
- reader.ch == 'd' || reader.ch == 'D')) {
+ } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {
scanFractionAndSuffix(pos);
} else {
if (!seenValidDigit) {
switch (radix) {
case 2:
@@ -643,27 +636,35 @@
case 16:
lexError(pos, Errors.InvalidHexNumber);
break;
}
}
- if (reader.ch == 'l' || reader.ch == 'L') {
- reader.scanChar();
+
+ if (acceptOneOf('l', 'L')) {
tk = TokenKind.LONGLITERAL;
} else {
tk = TokenKind.INTLITERAL;
}
}
}
- /** Read an identifier.
+ /**
+ * Determines if the sequence in the literal buffer is a token (keyword, operator.)
+ */
+ private void checkIdent() {
+ name = names.fromString(sb.toString());
+ tk = tokens.lookupKind(name);
+ }
+
+ /**
+ * Read an identifier. (Spec. 3.8)
*/
private void scanIdent() {
- boolean isJavaIdentifierPart;
- char high;
- reader.putChar(true);
+ putThenNext();
+
do {
- switch (reader.ch) {
+ switch (get()) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
@@ -676,123 +677,135 @@
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
+
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
- reader.scanChar();
+ next();
continue;
+
case '\u001A': // EOI is also a legal identifier part
- if (reader.bp >= reader.buflen) {
- name = reader.name();
- tk = tokens.lookupKind(name);
+ if (isEOF()) {
+ checkIdent();
return;
}
- reader.scanChar();
+
+ next();
continue;
+
default:
- if (reader.ch < '\u0080') {
+ boolean isJavaIdentifierPart;
+
+ if (isASCII()) {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
- if (Character.isIdentifierIgnorable(reader.ch)) {
- reader.scanChar();
+ if (Character.isIdentifierIgnorable(get())) {
+ next();
continue;
- } else {
- int codePoint = reader.peekSurrogates();
- if (codePoint >= 0) {
- if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
- reader.putChar(true);
- }
- } else {
- isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
- }
}
+
+ isJavaIdentifierPart = isSurrogate()
+ ? Character.isJavaIdentifierPart(getCodepoint())
+ : Character.isJavaIdentifierPart(get());
}
+
if (!isJavaIdentifierPart) {
- name = reader.name();
- tk = tokens.lookupKind(name);
+ checkIdent();
return;
}
}
- reader.putChar(true);
+
+ putThenNext();
} while (true);
}
- /** Return true if reader.ch can be part of an operator.
+ /**
+ * Return true if ch can be part of an operator.
+ *
+ * @param ch character to check.
+ *
+ * @return true if ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
+
default:
return false;
}
}
- /** Read longest possible sequence of special characters and convert
- * to token.
+ /**
+ * Read longest possible sequence of special characters and convert to token.
*/
private void scanOperator() {
while (true) {
- reader.putChar(false);
- Name newname = reader.name();
- TokenKind tk1 = tokens.lookupKind(newname);
- if (tk1 == TokenKind.IDENTIFIER) {
- reader.sp--;
+ put();
+ TokenKind newtk = tokens.lookupKind(sb.toString());
+
+ if (newtk == TokenKind.IDENTIFIER) {
+ sb.setLength(sb.length() - 1);
+ break;
+ }
+
+ tk = newtk;
+ next();
+
+ if (!isSpecial(get())) {
break;
}
- tk = tk1;
- reader.scanChar();
- if (!isSpecial(reader.ch)) break;
}
}
- /** Read token.
+ /**
+ * Read token (main entrypoint.)
*/
public Token readToken() {
-
- reader.sp = 0;
+ sb.setLength(0);
name = null;
radix = 0;
+ isTextBlock = false;
+ hasEscapeSequences = false;
- int pos = 0;
- int endPos = 0;
+ int pos;
List<Comment> comments = null;
try {
loop: while (true) {
- pos = reader.bp;
- switch (reader.ch) {
+ pos = position();
+
+ switch (get()) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
- case FF: // (Spec 3.6)
- do {
- reader.scanChar();
- } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
- processWhiteSpace(pos, reader.bp);
+ case '\f': // (Spec 3.6)
+ skipWhitespace();
+ processWhiteSpace(pos, position());
break;
- case LF: // (Spec 3.4)
- reader.scanChar();
- processLineTerminator(pos, reader.bp);
+
+ case '\n': // (Spec 3.4)
+ next();
+ processLineTerminator(pos, position());
break;
- case CR: // (Spec 3.4)
- reader.scanChar();
- if (reader.ch == LF) {
- reader.scanChar();
- }
- processLineTerminator(pos, reader.bp);
+
+ case '\r': // (Spec 3.4)
+ next();
+ accept('\n');
+ processLineTerminator(pos, position());
break;
+
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
@@ -801,196 +814,235 @@
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
- case '$': case '_':
+ case '$': case '_': // (Spec. 3.8)
scanIdent();
break loop;
- case '0':
- reader.scanChar();
- if (reader.ch == 'x' || reader.ch == 'X') {
- reader.scanChar();
+
+ case '0': // (Spec. 3.10)
+ next();
+
+ if (acceptOneOf('x', 'X')) {
skipIllegalUnderscores();
scanNumber(pos, 16);
- } else if (reader.ch == 'b' || reader.ch == 'B') {
- reader.scanChar();
+ } else if (acceptOneOf('b', 'B')) {
skipIllegalUnderscores();
scanNumber(pos, 2);
} else {
- reader.putChar('0');
- if (reader.ch == '_') {
- int savePos = reader.bp;
- do {
- reader.scanChar();
- } while (reader.ch == '_');
- if (reader.digit(pos, 10) < 0) {
+ put('0');
+
+ if (is('_')) {
+ int savePos = position();
+ skip('_');
+
+ if (digit(pos, 10) < 0) {
lexError(savePos, Errors.IllegalUnderscore);
}
}
+
scanNumber(pos, 8);
}
break loop;
+
case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
+ case '5': case '6': case '7': case '8': case '9': // (Spec. 3.10)
scanNumber(pos, 10);
break loop;
- case '.':
- reader.scanChar();
- if (reader.digit(pos, 10) >= 0) {
- reader.putChar('.');
- scanFractionAndSuffix(pos);
- } else if (reader.ch == '.') {
- int savePos = reader.bp;
- reader.putChar('.'); reader.putChar('.', true);
- if (reader.ch == '.') {
- reader.scanChar();
- reader.putChar('.');
+
+ case '.': // (Spec. 3.12)
+ if (accept("...")) {
+ put("...");
tk = TokenKind.ELLIPSIS;
} else {
+ next();
+ int savePos = position();
+
+ if (accept('.')) {
lexError(savePos, Errors.IllegalDot);
- }
+ } else if (digit(pos, 10) >= 0) {
+ put('.');
+ scanFractionAndSuffix(pos); // (Spec. 3.10)
} else {
tk = TokenKind.DOT;
}
+ }
+ break loop;
+
+ case ',': // (Spec. 3.12)
+ next();
+ tk = TokenKind.COMMA;
+ break loop;
+
+ case ';': // (Spec. 3.12)
+ next();
+ tk = TokenKind.SEMI;
break loop;
- case ',':
- reader.scanChar(); tk = TokenKind.COMMA; break loop;
- case ';':
- reader.scanChar(); tk = TokenKind.SEMI; break loop;
- case '(':
- reader.scanChar(); tk = TokenKind.LPAREN; break loop;
- case ')':
- reader.scanChar(); tk = TokenKind.RPAREN; break loop;
- case '[':
- reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
- case ']':
- reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
- case '{':
- reader.scanChar(); tk = TokenKind.LBRACE; break loop;
- case '}':
- reader.scanChar(); tk = TokenKind.RBRACE; break loop;
+
+ case '(': // (Spec. 3.12)
+ next();
+ tk = TokenKind.LPAREN;
+ break loop;
+
+ case ')': // (Spec. 3.12)
+ next();
+ tk = TokenKind.RPAREN;
+ break loop;
+
+ case '[': // (Spec. 3.12)
+ next();
+ tk = TokenKind.LBRACKET;
+ break loop;
+
+ case ']': // (Spec. 3.12)
+ next();
+ tk = TokenKind.RBRACKET;
+ break loop;
+
+ case '{': // (Spec. 3.12)
+ next();
+ tk = TokenKind.LBRACE;
+ break loop;
+
+ case '}': // (Spec. 3.12)
+ next();
+ tk = TokenKind.RBRACE;
+ break loop;
+
case '/':
- reader.scanChar();
- if (reader.ch == '/') {
- do {
- reader.scanCommentChar();
- } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
- if (reader.bp < reader.buflen) {
- comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
+ next();
+
+ if (accept('/')) { // (Spec. 3.7)
+ skipToEOLN();
+
+ if (!isEOF()) {
+ comments = appendComment(comments, processComment(pos, position(), CommentStyle.LINE));
}
break;
- } else if (reader.ch == '*') {
+ } else if (accept('*')) { // (Spec. 3.7)
boolean isEmpty = false;
- reader.scanChar();
CommentStyle style;
- if (reader.ch == '*') {
+
+ if (accept('*')) {
style = CommentStyle.JAVADOC;
- reader.scanCommentChar();
- if (reader.ch == '/') {
+
+ if (is('/')) {
isEmpty = true;
}
} else {
style = CommentStyle.BLOCK;
}
- while (!isEmpty && reader.bp < reader.buflen) {
- if (reader.ch == '*') {
- reader.scanChar();
- if (reader.ch == '/') break;
+
+ if (!isEmpty) {
+ while (!isEOF()) {
+ if (accept('*')) {
+ if (is('/')) {
+ break;
+ }
} else {
- reader.scanCommentChar();
+ next();
+ }
}
}
- if (reader.ch == '/') {
- reader.scanChar();
- comments = addComment(comments, processComment(pos, reader.bp, style));
+
+ if (accept('/')) {
+ comments = appendComment(comments, processComment(pos, position(), style));
+
break;
} else {
lexError(pos, Errors.UnclosedComment);
+
break loop;
}
- } else if (reader.ch == '=') {
- tk = TokenKind.SLASHEQ;
- reader.scanChar();
+ } else if (accept('=')) {
+ tk = TokenKind.SLASHEQ; // (Spec. 3.12)
} else {
- tk = TokenKind.SLASH;
+ tk = TokenKind.SLASH; // (Spec. 3.12)
}
break loop;
- case '\'':
- reader.scanChar();
- if (reader.ch == '\'') {
+
+ case '\'': // (Spec. 3.10)
+ next();
+
+ if (accept('\'')) {
lexError(pos, Errors.EmptyCharLit);
- reader.scanChar();
} else {
- if (isEOLN())
+ if (isEOLN()) {
lexError(pos, Errors.IllegalLineEndInCharLit);
- scanLitChar(pos, true, false);
- if (reader.ch == '\'') {
- reader.scanChar();
+ }
+
+ scanLitChar(pos);
+
+ if (accept('\'')) {
tk = TokenKind.CHARLITERAL;
} else {
lexError(pos, Errors.UnclosedCharLit);
}
}
break loop;
- case '\"':
+
+ case '\"': // (Spec. 3.10)
scanString(pos);
break loop;
+
default:
- if (isSpecial(reader.ch)) {
+ if (isSpecial(get())) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
- int codePoint = -1;
- if (reader.ch < '\u0080') {
+
+ if (isASCII()) {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
- codePoint = reader.peekSurrogates();
- if (codePoint >= 0) {
- if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
- reader.putChar(true);
- }
- } else {
- isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
- }
+ isJavaIdentifierStart = isSurrogate()
+ ? Character.isJavaIdentifierStart(getCodepoint())
+ : Character.isJavaIdentifierStart(get());
}
+
if (isJavaIdentifierStart) {
scanIdent();
- } else if (reader.digit(pos, 10) >= 0) {
+ } else if (digit(pos, 10) >= 0) {
scanNumber(pos, 10);
- } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
+ } else if (is((char)EOI) || isEOF()) {
tk = TokenKind.EOF;
- pos = reader.realLength;
+ pos = position();
} else {
String arg;
- if (codePoint >= 0) {
- char high = reader.ch;
- reader.scanChar();
- arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
- } else {
- arg = (32 < reader.ch && reader.ch < 127) ?
- String.format("%s", reader.ch) :
- String.format("\\u%04x", (int)reader.ch);
+ if (isSurrogate()) {
+ int codePoint = getCodepoint();
+ char hi = Character.highSurrogate(codePoint);
+ char lo = Character.lowSurrogate(codePoint);
+ arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
+ } else {
+ char ch = get();
+ arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
+ String.format("\\u%04x", (int) ch);
}
+
lexError(pos, Errors.IllegalChar(arg));
- reader.scanChar();
+ next();
}
}
break loop;
}
}
- endPos = reader.bp;
- switch (tk.tag) {
- case DEFAULT: return new Token(tk, pos, endPos, comments);
- case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
- case STRING: {
+
+ int endPos = position();
+
+ if (tk.tag == Token.Tag.DEFAULT) {
+ return new Token(tk, pos, endPos, comments);
+ } else if (tk.tag == Token.Tag.NAMED) {
+ return new NamedToken(tk, pos, endPos, name, comments);
+ } else {
// Get characters from string buffer.
- String string = reader.chars();
+ String string = sb.toString();
+
// If a text block.
- if (isTextBlock && TextBlockSupport.hasSupport()) {
+ if (isTextBlock) {
// Verify that the incidental indentation is consistent.
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
Set<TextBlockSupport.WhitespaceChecks> checks =
TextBlockSupport.checkWhitespace(string);
if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
@@ -1002,218 +1054,282 @@
Warnings.TrailingWhiteSpaceWillBeRemoved);
}
}
// Remove incidental indentation.
try {
- string = TextBlockSupport.stripIndent(string);
+ string = string.stripIndent();
} catch (Exception ex) {
// Error already reported, just use unstripped string.
}
}
+
// Translate escape sequences if present.
- if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
+ if (hasEscapeSequences) {
try {
- string = TextBlockSupport.translateEscapes(string);
+ string = string.translateEscapes();
} catch (Exception ex) {
// Error already reported, just use untranslated string.
}
}
+
+ if (tk.tag == Token.Tag.STRING) {
// Build string token.
return new StringToken(tk, pos, endPos, string, comments);
- }
- case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
- default: throw new AssertionError();
+ } else {
+ // Build numeric token.
+ return new NumericToken(tk, pos, endPos, string, radix, comments);
}
}
- finally {
+ } finally {
+ int endPos = position();
+
if (scannerDebug) {
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
- new String(reader.getRawCharacters(pos, endPos))
+ new String(getRawCharacters(pos, endPos))
+ "|");
}
}
}
- //where
- List<Comment> addComment(List<Comment> comments, Comment comment) {
+
+ /**
+ * Appends a comment to the list of comments preceding the current token.
+ *
+ * @param comments existing list of comments.
+ * @param comment comment to append.
+ *
+ * @return new list with comment prepended to the existing list.
+ */
+ List<Comment> appendComment(List<Comment> comments, Comment comment) {
return comments == null ?
List.of(comment) :
comments.prepend(comment);
}
- /** Return the position where a lexical error occurred;
+ /**
+ * Return the position where a lexical error occurred.
+ *
+ * @return position in the input buffer of where the error occurred.
*/
public int errPos() {
return errPos;
}
- /** Set the position where a lexical error occurred;
+ /**
+ * Set the position where a lexical error occurred.
+ *
+ * @param pos position in the input buffer of where the error occurred.
*/
public void errPos(int pos) {
errPos = pos;
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
+ *
+ * @param pos position of the opening / in the input buffer.
+ * @param endPos position + 1 of the closing / in the input buffer.
+ * @param style style of comment.
+ *
+ * @return the constructed BasicComment.
*/
protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
- if (scannerDebug)
+ if (scannerDebug) {
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
- + new String(reader.getRawCharacters(pos, endPos))
+ + new String(getRawCharacters(pos, endPos))
+ "|");
- char[] buf = reader.getRawCharacters(pos, endPos);
- return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
+ }
+
+ char[] buf = getRawCharacters(pos, endPos);
+
+ return new BasicComment(style, fac, buf, pos);
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
+ *
+ * (Spec 3.6)
+ *
+ * @param pos position in input buffer of first whitespace character.
+ * @param endPos position + 1 in input buffer of last whitespace character.
*/
protected void processWhiteSpace(int pos, int endPos) {
- if (scannerDebug)
+ if (scannerDebug) {
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
- new String(reader.getRawCharacters(pos, endPos))
+ new String(getRawCharacters(pos, endPos))
+ "|");
}
+ }
/**
* Called when a line terminator has been processed.
+ *
+ * @param pos position in input buffer of first character in sequence.
+ * @param endPos position + 1 in input buffer of last character in sequence.
*/
protected void processLineTerminator(int pos, int endPos) {
- if (scannerDebug)
+ if (scannerDebug) {
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
- new String(reader.getRawCharacters(pos, endPos))
+ new String(getRawCharacters(pos, endPos))
+ "|");
}
+ }
- /** Build a map for translating between line numbers and
- * positions in the input.
+ /**
+ * Build a map for translating between line numbers and positions in the input.
*
- * @return a LineMap */
+ * @return a LineMap
+ */
public Position.LineMap getLineMap() {
- return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
+ return Position.makeLineMap(getRawCharacters(), length(), false);
}
-
/**
* Scan a documentation comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
- protected static class BasicComment<U extends UnicodeReader> implements Comment {
-
+ protected static class BasicComment extends PositionTrackingReader implements Comment {
+ /**
+ * Style of comment
+ * LINE starting with //
+ * BLOCK starting with /*
+ * JAVADOC starting with /**
+ */
CommentStyle cs;
- U comment_reader;
+ /**
+ * true if comment contains @deprecated at beginning of a line.
+ */
protected boolean deprecatedFlag = false;
+
+ /**
+ * true if comment has been fully scanned.
+ */
protected boolean scanned = false;
- protected BasicComment(U comment_reader, CommentStyle cs) {
- this.comment_reader = comment_reader;
+ /**
+ * Constructor.
+ *
+ * @param cs comment style
+ * @param sf Scan factory.
+ * @param array Array containing contents of source.
+ * @param offset Position offset in original source buffer.
+ */
+ protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
+ super(sf, array, offset);
this.cs = cs;
}
+ /**
+ * Return comment body text minus comment adornments or null if not scanned.
+ *
+ * @return comment body text.
+ */
public String getText() {
return null;
}
+ /**
+ * Return buffer position in original buffer mapped from buffer position in comment.
+ *
+ * @param pos buffer position in comment.
+ *
+ * @return buffer position in original buffer.
+ */
public int getSourcePos(int pos) {
return -1;
}
+ /**
+ * Return style of comment.
+ * LINE starting with //
+ * BLOCK starting with /*
+ * JAVADOC starting with /**
+ *
+ * @return
+ */
public CommentStyle getStyle() {
return cs;
}
+ /**
+ * true if comment contains @deprecated at beginning of a line.
+ *
+ * @return true if comment contains @deprecated.
+ */
public boolean isDeprecated() {
if (!scanned && cs == CommentStyle.JAVADOC) {
scanDocComment();
}
+
return deprecatedFlag;
}
- @SuppressWarnings("fallthrough")
+ /**
+ * Scan JAVADOC comment for details.
+ */
protected void scanDocComment() {
try {
boolean deprecatedPrefix = false;
-
- comment_reader.bp += 3; // '/**'
- comment_reader.ch = comment_reader.buf[comment_reader.bp];
+ accept("/**");
forEachLine:
- while (comment_reader.bp < comment_reader.buflen) {
-
+ while (!isEOF()) {
// Skip optional WhiteSpace at beginning of line
- while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
- comment_reader.scanCommentChar();
- }
+ skipWhitespace();
// Skip optional consecutive Stars
- while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
- comment_reader.scanCommentChar();
- if (comment_reader.ch == '/') {
+ while (accept('*')) {
+ if (is('/')) {
return;
}
}
// Skip optional WhiteSpace after Stars
- while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
- comment_reader.scanCommentChar();
- }
+ skipWhitespace();
- deprecatedPrefix = false;
// At beginning of line in the JavaDoc sense.
- if (!deprecatedFlag) {
- String deprecated = "@deprecated";
- int i = 0;
- while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
- comment_reader.scanCommentChar();
- i++;
- if (i == deprecated.length()) {
- deprecatedPrefix = true;
- break;
- }
- }
- }
+ deprecatedPrefix = deprecatedFlag || accept("@deprecated");
- if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
- if (Character.isWhitespace(comment_reader.ch)) {
+ if (deprecatedPrefix && !isEOF()) {
+ if (Character.isWhitespace(get())) {
deprecatedFlag = true;
- } else if (comment_reader.ch == '*') {
- comment_reader.scanCommentChar();
- if (comment_reader.ch == '/') {
+ } else if (accept('*')) {
+ if (is('/')) {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
- while (comment_reader.bp < comment_reader.buflen) {
- switch (comment_reader.ch) {
+ while (!isEOF()) {
+ switch (get()) {
case '*':
- comment_reader.scanCommentChar();
- if (comment_reader.ch == '/') {
+ next();
+
+ if (is('/')) {
return;
}
+
break;
- case CR: // (Spec 3.4)
- comment_reader.scanCommentChar();
- if (comment_reader.ch != LF) {
- continue forEachLine;
- }
- /* fall through to LF case */
- case LF: // (Spec 3.4)
- comment_reader.scanCommentChar();
+ case '\r': // (Spec 3.4)
+ case '\n': // (Spec 3.4)
+ accept('\r');
+ accept('\n');
continue forEachLine;
+
default:
- comment_reader.scanCommentChar();
+ next();
+ break;
}
} // rest of line
} // forEachLine
return;
} finally {
< prev index next >