< prev index next >
src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
Print this page
rev 60227 : 8224225: Tokenizer improvements
Reviewed-by: jlaskey
*** 1,7 ****
/*
! * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 28,151 ****
import com.sun.tools.javac.code.Lint;
import com.sun.tools.javac.code.Lint.LintCategory;
import com.sun.tools.javac.code.Preview;
import com.sun.tools.javac.code.Source;
import com.sun.tools.javac.code.Source.Feature;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.resources.CompilerProperties.Warnings;
import com.sun.tools.javac.util.*;
import com.sun.tools.javac.util.JCDiagnostic.*;
- import java.lang.reflect.InvocationTargetException;
- import java.lang.reflect.Method;
import java.nio.CharBuffer;
- import java.util.HashSet;
import java.util.Set;
import static com.sun.tools.javac.parser.Tokens.*;
! import static com.sun.tools.javac.util.LayoutCharacters.*;
! /** The lexical analyzer maps an input stream consisting of
! * ASCII characters and Unicode escapes into a token sequence.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
! public class JavaTokenizer {
!
private static final boolean scannerDebug = false;
! /** The source language setting.
*/
private Source source;
! /** The preview language setting. */
private Preview preview;
! /** The log to be used for error reporting.
*/
private final Log log;
! /** The token factory. */
private final Tokens tokens;
! /** The token kind, set by nextToken().
*/
protected TokenKind tk;
! /** The token's radix, set by nextToken().
*/
protected int radix;
! /** The token's name, set by nextToken().
*/
protected Name name;
! /** The position where a lexical error occurred;
*/
protected int errPos = Position.NOPOS;
! /** The Unicode reader (low-level stream reader).
! */
! protected UnicodeReader reader;
!
! /** If is a text block
*/
protected boolean isTextBlock;
! /** If contains escape sequences
*/
protected boolean hasEscapeSequences;
protected ScannerFactory fac;
! // The set of lint options currently in effect. It is initialized
! // from the context, and then is set/reset as needed by Attr as it
! // visits all the various parts of the trees during attribution.
protected Lint lint;
! private static final boolean hexFloatsWork = hexFloatsWork();
! private static boolean hexFloatsWork() {
! try {
! Float.valueOf("0x1.0p1");
! return true;
! } catch (NumberFormatException ex) {
! return false;
! }
}
/**
! * Create a scanner from the input array. This method might
! * modify the array. To avoid copying the input array, ensure
! * that {@code inputLength < input.length} or
! * {@code input[input.length -1]} is a white space character.
*
* @param fac the factory which created this Scanner
! * @param buf the input, might be modified
! * Must be positive and less than or equal to input.length.
*/
! protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
! this(fac, new UnicodeReader(fac, buf));
! }
!
! protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
! this(fac, new UnicodeReader(fac, buf, inputLength));
! }
!
! protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
this.fac = fac;
this.log = fac.log;
this.tokens = fac.tokens;
this.source = fac.source;
this.preview = fac.preview;
- this.reader = reader;
this.lint = fac.lint;
}
protected void checkSourceLevel(int pos, Feature feature) {
if (preview.isPreview(feature) && !preview.isEnabled()) {
//preview feature without --preview flag, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
} else if (!feature.allowedInSource(source)) {
--- 28,178 ----
import com.sun.tools.javac.code.Lint;
import com.sun.tools.javac.code.Lint.LintCategory;
import com.sun.tools.javac.code.Preview;
import com.sun.tools.javac.code.Source;
import com.sun.tools.javac.code.Source.Feature;
+ import com.sun.tools.javac.file.JavacFileManager;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.resources.CompilerProperties.Warnings;
import com.sun.tools.javac.util.*;
import com.sun.tools.javac.util.JCDiagnostic.*;
import java.nio.CharBuffer;
import java.util.Set;
+ import java.util.regex.Pattern;
import static com.sun.tools.javac.parser.Tokens.*;
! import static com.sun.tools.javac.util.LayoutCharacters.EOI;
! /**
! * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
! * escape sequences into a token sequence.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
! public class JavaTokenizer extends UnicodeReader {
! /**
! * If true then prints token information after each nextToken().
! */
private static final boolean scannerDebug = false;
! /**
! * Sentinal for non-value.
! */
! private int NOT_FOUND = -1;
!
! /**
! * The source language setting. Copied from scanner factory.
*/
private Source source;
! /**
! * The preview language setting. Copied from scanner factory.
! */
private Preview preview;
! /**
! * The log to be used for error reporting. Copied from scanner factory.
*/
private final Log log;
! /**
! * The token factory. Copied from scanner factory.
! */
private final Tokens tokens;
! /**
! * The names factory. Copied from scanner factory.
! */
! private final Names names;
!
! /**
! * The token kind, set by nextToken().
*/
protected TokenKind tk;
! /**
! * The token's radix, set by nextToken().
*/
protected int radix;
! /**
! * The token's name, set by nextToken().
*/
protected Name name;
! /**
! * The position where a lexical error occurred;
*/
protected int errPos = Position.NOPOS;
! /**
! * true if is a text block, set by nextToken().
*/
protected boolean isTextBlock;
! /**
! * true if contains escape sequences, set by nextToken().
*/
protected boolean hasEscapeSequences;
+ /**
+ * Buffer for building literals, used by nextToken().
+ */
+ protected StringBuilder sb;
+
+ /**
+ * Origin scanner factory.
+ */
protected ScannerFactory fac;
! /**
! * The set of lint options currently in effect. It is initialized
! * from the context, and then is set/reset as needed by Attr as it
! * visits all the various parts of the trees during attribution.
! */
protected Lint lint;
! /**
! * Construct a Java token scanner from the input character buffer.
! *
! * @param fac the factory which created this Scanner.
! * @param cb the input character buffer.
! */
! protected JavaTokenizer(ScannerFactory fac, CharBuffer cb) {
! this(fac, JavacFileManager.toArray(cb), cb.limit());
}
/**
! * Construct a Java token scanner from the input character array.
*
* @param fac the factory which created this Scanner
! * @param array the input character array.
! * @param length The length of the meaningful content in the array.
*/
! protected JavaTokenizer(ScannerFactory fac, char[] array, int length) {
! super(fac, array, length);
this.fac = fac;
this.log = fac.log;
+ this.names = fac.names;
this.tokens = fac.tokens;
this.source = fac.source;
this.preview = fac.preview;
this.lint = fac.lint;
+ this.sb = new StringBuilder(256);
}
+ /**
+ * Check the source level for a lexical feature.
+ *
+ * @param pos position in input buffer.
+ * @param feature feature to verify.
+ */
protected void checkSourceLevel(int pos, Feature feature) {
if (preview.isPreview(feature) && !preview.isEnabled()) {
//preview feature without --preview flag, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
} else if (!feature.allowedInSource(source)) {
*** 155,640 ****
//use of preview feature, warn
preview.warnPreview(pos, feature);
}
}
! /** Report an error at the given position using the provided arguments.
*/
protected void lexError(int pos, JCDiagnostic.Error key) {
log.error(pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
log.error(flags, pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
log.warning(lc, dp, key);
}
! /** Read next character in character or string literal and copy into sbuf.
! * pos - start of literal offset
! * translateEscapesNow - true if String::translateEscapes is not available
! * in the java.base libs. Occurs during bootstrapping.
! * multiline - true if scanning a text block. Allows newlines to be embedded
! * in the result.
! */
! private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
! if (reader.ch == '\\') {
! if (reader.peekChar() == '\\' && !reader.isUnicode()) {
! reader.skipChar();
! if (!translateEscapesNow) {
! reader.putChar(false);
! }
! reader.putChar(true);
! } else {
! reader.nextChar(translateEscapesNow);
! switch (reader.ch) {
! case '0': case '1': case '2': case '3':
! case '4': case '5': case '6': case '7':
! char leadch = reader.ch;
! int oct = reader.digit(pos, 8);
! reader.nextChar(translateEscapesNow);
! if ('0' <= reader.ch && reader.ch <= '7') {
! oct = oct * 8 + reader.digit(pos, 8);
! reader.nextChar(translateEscapesNow);
! if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
! oct = oct * 8 + reader.digit(pos, 8);
! reader.nextChar(translateEscapesNow);
! }
}
! if (translateEscapesNow) {
! reader.putChar((char)oct);
}
! break;
! case 'b':
! reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
! case 't':
! reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
! case 'n':
! reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
! case 'f':
! reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
! case 'r':
! reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
! case '\'':
! case '\"':
! case '\\':
! reader.putChar(true); break;
! case 's':
! checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
! reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
! case '\n':
! case '\r':
! if (!multiline) {
! lexError(reader.bp, Errors.IllegalEscChar);
} else {
! checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
! int start = reader.bp;
! if (reader.ch == '\r' && reader.peekChar() == '\n') {
! reader.nextChar(translateEscapesNow);
! }
! reader.nextChar(translateEscapesNow);
! processLineTerminator(start, reader.bp);
! }
! break;
! default:
! lexError(reader.bp, Errors.IllegalEscChar);
! }
! }
! } else if (reader.bp != reader.buflen) {
! reader.putChar(true);
}
}
! /** Interim access to String methods used to support text blocks.
! * Required to handle bootstrapping with pre-text block jdks.
! * Should be replaced with direct calls in the 'next' jdk.
! */
! static class TextBlockSupport {
! /** Reflection method to remove incidental indentation.
*/
! private static final Method stripIndent;
! /** Reflection method to translate escape sequences.
*/
! private static final Method translateEscapes;
! /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
! */
! private static final boolean hasSupport;
! /** Get a string method via refection or null if not available.
*/
! private static Method getStringMethodOrNull(String name) {
! try {
! return String.class.getMethod(name);
! } catch (Exception ex) {
! // Method not available, return null.
! }
! return null;
! }
! static {
! // Get text block string methods.
! stripIndent = getStringMethodOrNull("stripIndent");
! translateEscapes = getStringMethodOrNull("translateEscapes");
! // true if stripIndent and translateEscapes are available in the bootstrap jdk.
! hasSupport = stripIndent != null && translateEscapes != null;
! }
!
! /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
! */
! static boolean hasSupport() {
! return hasSupport;
! }
!
! /** Return the leading whitespace count (indentation) of the line.
! */
! private static int indent(String line) {
! return line.length() - line.stripLeading().length();
! }
!
! enum WhitespaceChecks {
! INCONSISTENT,
! TRAILING
! };
!
! /** Check that the use of white space in content is not problematic.
! */
! static Set<WhitespaceChecks> checkWhitespace(String string) {
! // Start with empty result set.
! Set<WhitespaceChecks> checks = new HashSet<>();
! // No need to check empty strings.
! if (string.isEmpty()) {
! return checks;
! }
! // Maximum common indentation.
! int outdent = 0;
! // No need to check indentation if opting out (last line is empty.)
! char lastChar = string.charAt(string.length() - 1);
! boolean optOut = lastChar == '\n' || lastChar == '\r';
! // Split string based at line terminators.
! String[] lines = string.split("\\R");
! int length = lines.length;
! // Extract last line.
! String lastLine = length == 0 ? "" : lines[length - 1];
! if (!optOut) {
! // Prime with the last line indentation (may be blank.)
! outdent = indent(lastLine);
! for (String line : lines) {
! // Blanks lines have no influence (last line accounted for.)
! if (!line.isBlank()) {
! outdent = Integer.min(outdent, indent(line));
! if (outdent == 0) {
! break;
! }
! }
! }
! }
! // Last line is representative.
! String start = lastLine.substring(0, outdent);
! for (String line : lines) {
! // Fail if a line does not have the same indentation.
! if (!line.isBlank() && !line.startsWith(start)) {
! // Mix of different white space
! checks.add(WhitespaceChecks.INCONSISTENT);
! }
! // Line has content even after indent is removed.
! if (outdent < line.length()) {
! // Is the last character a white space.
! lastChar = line.charAt(line.length() - 1);
! if (Character.isWhitespace(lastChar)) {
! // Has trailing white space.
! checks.add(WhitespaceChecks.TRAILING);
! }
! }
! }
! return checks;
}
! /** Invoke String::stripIndent through reflection.
! */
! static String stripIndent(String string) {
! try {
! string = (String)stripIndent.invoke(string);
! } catch (InvocationTargetException | IllegalAccessException ex) {
! throw new RuntimeException(ex);
! }
! return string;
}
! /** Invoke String::translateEscapes through reflection.
*/
! static String translateEscapes(String string) {
! try {
! string = (String)translateEscapes.invoke(string);
! } catch (InvocationTargetException | IllegalAccessException ex) {
! throw new RuntimeException(ex);
! }
! return string;
}
}
! /** Test for EOLN.
*/
private boolean isEOLN() {
! return reader.ch == LF || reader.ch == CR;
}
! /** Test for CRLF.
*/
! private boolean isCRLF() {
! return reader.ch == CR && reader.peekChar() == LF;
}
! /** Count and skip repeated occurrences of the specified character.
*/
! private int countChar(char ch, int max) {
! int count = 0;
! for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
! reader.scanChar();
}
- return count;
}
! /** Skip and process a line terminator.
! */
! private void skipLineTerminator() {
! int start = reader.bp;
! if (isCRLF()) {
! reader.scanChar();
}
- reader.scanChar();
- processLineTerminator(start, reader.bp);
}
! /** Scan a string literal or text block.
*/
private void scanString(int pos) {
! // Clear flags.
! isTextBlock = false;
! hasEscapeSequences = false;
! // Track the end of first line for error recovery.
! int firstEOLN = -1;
! // Attempt to scan for up to 3 double quotes.
! int openCount = countChar('\"', 3);
! switch (openCount) {
! case 1: // Starting a string literal.
! break;
! case 2: // Starting an empty string literal.
tk = Tokens.TokenKind.STRINGLITERAL;
! return;
! case 3: // Starting a text block.
// Check if preview feature is enabled for text blocks.
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
! isTextBlock = true;
// Verify the open delimiter sequence.
! while (reader.bp < reader.buflen) {
! char ch = reader.ch;
! if (ch != ' ' && ch != '\t' && ch != FF) {
! break;
}
! reader.scanChar();
}
if (isEOLN()) {
skipLineTerminator();
} else {
! // Error if the open delimiter sequence is not
! // """<white space>*<LineTerminator>.
! lexError(reader.bp, Errors.IllegalTextBlockOpen);
! return;
}
- break;
}
// While characters are available.
! while (reader.bp < reader.buflen) {
! // If possible close delimiter sequence.
! if (reader.ch == '\"') {
! // Check to see if enough double quotes are present.
! int closeCount = countChar('\"', openCount);
! if (openCount == closeCount) {
! // Good result.
! tk = Tokens.TokenKind.STRINGLITERAL;
return;
}
! // False alarm, add double quotes to string buffer.
! reader.repeat('\"', closeCount);
! } else if (isEOLN()) {
// Line terminator in string literal is an error.
// Fall out to unclosed string literal error.
- if (openCount == 1) {
break;
- }
- skipLineTerminator();
- // Add line terminator to string buffer.
- reader.putChar('\n', false);
- // Record first line terminator for error recovery.
- if (firstEOLN == -1) {
- firstEOLN = reader.bp;
- }
- } else if (reader.ch == '\\') {
- // Handle escape sequences.
- hasEscapeSequences = true;
- // Translate escapes immediately if TextBlockSupport is not available
- // during bootstrapping.
- boolean translateEscapesNow = !TextBlockSupport.hasSupport();
- scanLitChar(pos, translateEscapesNow, openCount != 1);
} else {
// Add character to string buffer.
! reader.putChar(true);
}
}
// String ended without close delimiter sequence.
! lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
! if (firstEOLN != -1) {
! // Reset recovery position to point after open delimiter sequence.
! reader.reset(firstEOLN);
}
}
private void scanDigits(int pos, int digitRadix) {
! char saveCh;
! int savePos;
do {
! if (reader.ch != '_') {
! reader.putChar(false);
}
- saveCh = reader.ch;
- savePos = reader.bp;
- reader.scanChar();
- } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
- if (saveCh == '_')
- lexError(savePos, Errors.IllegalUnderscore);
}
! /** Read fractional part of hexadecimal floating point number.
*/
private void scanHexExponentAndSuffix(int pos) {
! if (reader.ch == 'p' || reader.ch == 'P') {
! reader.putChar(true);
skipIllegalUnderscores();
! if (reader.ch == '+' || reader.ch == '-') {
! reader.putChar(true);
! }
skipIllegalUnderscores();
! if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
! if (!hexFloatsWork)
! lexError(pos, Errors.UnsupportedCrossFpLit);
! } else
lexError(pos, Errors.MalformedFpLit);
} else {
lexError(pos, Errors.MalformedFpLit);
}
! if (reader.ch == 'f' || reader.ch == 'F') {
! reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
radix = 16;
} else {
! if (reader.ch == 'd' || reader.ch == 'D') {
! reader.putChar(true);
! }
tk = TokenKind.DOUBLELITERAL;
radix = 16;
}
}
! /** Read fractional part of floating point number.
*/
private void scanFraction(int pos) {
skipIllegalUnderscores();
! if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
}
! int sp1 = reader.sp;
! if (reader.ch == 'e' || reader.ch == 'E') {
! reader.putChar(true);
skipIllegalUnderscores();
! if (reader.ch == '+' || reader.ch == '-') {
! reader.putChar(true);
! }
skipIllegalUnderscores();
! if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
return;
}
lexError(pos, Errors.MalformedFpLit);
! reader.sp = sp1;
}
}
! /** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanFractionAndSuffix(int pos) {
radix = 10;
scanFraction(pos);
! if (reader.ch == 'f' || reader.ch == 'F') {
! reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
} else {
! if (reader.ch == 'd' || reader.ch == 'D') {
! reader.putChar(true);
! }
tk = TokenKind.DOUBLELITERAL;
}
}
! /** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
radix = 16;
! Assert.check(reader.ch == '.');
! reader.putChar(true);
skipIllegalUnderscores();
! if (reader.digit(pos, 16) >= 0) {
seendigit = true;
scanDigits(pos, 16);
}
if (!seendigit)
lexError(pos, Errors.InvalidHexNumber);
else
scanHexExponentAndSuffix(pos);
}
private void skipIllegalUnderscores() {
! if (reader.ch == '_') {
! lexError(reader.bp, Errors.IllegalUnderscore);
! while (reader.ch == '_')
! reader.scanChar();
}
}
! /** Read a number.
! * @param radix The radix of the number; one of 2, 8, 10, 16.
*/
private void scanNumber(int pos, int radix) {
// for octal, allow base-10 digit in case it's a float literal
this.radix = radix;
int digitRadix = (radix == 8 ? 10 : radix);
! int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
boolean seendigit = firstDigit >= 0;
boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
if (seendigit) {
scanDigits(pos, digitRadix);
}
! if (radix == 16 && reader.ch == '.') {
scanHexFractionAndSuffix(pos, seendigit);
! } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
scanHexExponentAndSuffix(pos);
! } else if (digitRadix == 10 && reader.ch == '.') {
! reader.putChar(true);
scanFractionAndSuffix(pos);
! } else if (digitRadix == 10 &&
! (reader.ch == 'e' || reader.ch == 'E' ||
! reader.ch == 'f' || reader.ch == 'F' ||
! reader.ch == 'd' || reader.ch == 'D')) {
scanFractionAndSuffix(pos);
} else {
if (!seenValidDigit) {
switch (radix) {
case 2:
--- 182,633 ----
//use of preview feature, warn
preview.warnPreview(pos, feature);
}
}
! /**
! * Report an error at the given position using the provided arguments.
! *
! * @param pos position in input buffer.
! * @param key error key to report.
*/
protected void lexError(int pos, JCDiagnostic.Error key) {
log.error(pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
+ /**
+ * Report an error at the given position using the provided arguments.
+ *
+ * @param flags diagnostic flags.
+ * @param pos position in input buffer.
+ * @param key error key to report.
+ */
protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
log.error(flags, pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
+ /**
+ * Report an error at the given position using the provided arguments.
+ *
+ * @param lc lint category.
+ * @param pos position in input buffer.
+ * @param key error key to report.
+ */
protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
log.warning(lc, dp, key);
}
! /**
! * Add a character to the literal buffer.
! *
! * @param ch character to add.
! */
! protected void put(char ch) {
! sb.append(ch);
}
!
! /**
! * Add a codepoint to the literal buffer.
! *
! * @param codePoint codepoint to add.
! */
! protected void putCodePoint(int codePoint) {
! sb.appendCodePoint(codePoint);
}
!
! /**
! * Add current character or codepoint to the literal buffer.
! */
! protected void put() {
! if (isSurrogate()) {
! putCodePoint(getCodepoint());
} else {
! put(get());
}
}
! /**
! * Add a string to the literal buffer.
*/
! protected void put(String string) {
! sb.append(string);
! }
! /**
! * Add current character or codepoint to the literal buffer then return next character.
*/
! protected char putThenNext() {
! put();
! return next();
! }
! /**
! * If the specified character ch matches the current character then add current character
! * to the literal buffer and then advance.
! *
! * @param ch character to match.
! *
! * @return true if ch matches current character.
*/
! protected boolean acceptThenPut(char ch) {
! if (is(ch)) {
! put(get());
! next();
! return true;
}
! return false;
}
! /**
! * If either ch1 or ch2 matches the current character then add current character
! * to the literal buffer and then advance.
! *
! * @param ch1 first character to match.
! * @param ch2 second character to match.
! *
! * @return true if either ch1 or ch2 matches current character.
*/
! protected boolean acceptOneOfThenPut(char ch1, char ch2) {
! if (isOneOf(ch1, ch2)) {
! put(get());
! next();
!
! return true;
}
+
+ return false;
}
! /**
! * Test if the current character is a line terminator.
! *
! * @return true if current character is a line terminator.
*/
private boolean isEOLN() {
! return isOneOf('\n', '\r');
}
! /**
! * Skip and process a line terminator sequence.
*/
! private void skipLineTerminator() {
! int start = position();
! accept('\r');
! accept('\n');
! processLineTerminator(start, position());
}
! /**
! * Processes the current character and places in the literal buffer. If the current
! * character is a backslash then the next character is validated as a proper
! * escape character. Conversion of escape sequences takes place at end of nextToken().
! *
! * @param pos position of the first character in literal.
*/
! private void scanLitChar(int pos) {
! if (acceptThenPut('\\')) {
! hasEscapeSequences = true;
!
! switch (get()) {
! case '0': case '1': case '2': case '3':
! case '4': case '5': case '6': case '7':
! char leadch = get();
! putThenNext();
!
! if (inRange('0', '7')) {
! putThenNext();
!
! if (leadch <= '3' && inRange('0', '7')) {
! putThenNext();
}
}
+ break;
! case 'b':
! case 't':
! case 'n':
! case 'f':
! case 'r':
! case '\'':
! case '\"':
! case '\\':
! putThenNext();
! break;
!
! case 's':
! checkSourceLevel(position(), Feature.TEXT_BLOCKS);
! putThenNext();
! break;
!
! case '\n':
! case '\r':
! if (isTextBlock) {
! skipLineTerminator();
! // Normalize line terminator.
! put('\n');
! } else {
! lexError(position(), Errors.IllegalEscChar);
! }
! break;
!
! default:
! lexError(position(), Errors.IllegalEscChar);
! break;
! }
! } else {
! putThenNext();
}
}
! /**
! * Scan a string literal or text block.
! *
! * @param pos position of the first character in literal.
*/
private void scanString(int pos) {
! // Assume the best.
tk = Tokens.TokenKind.STRINGLITERAL;
! // Track the end of first line for error recovery.
! int firstEOLN = NOT_FOUND;
! // Check for text block delimiter.
! isTextBlock = accept("\"\"\"");
!
! if (isTextBlock) {
// Check if preview feature is enabled for text blocks.
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
!
// Verify the open delimiter sequence.
! // Error if the open delimiter sequence is not """<white space>*<LineTerminator>.
! skipWhitespace();
!
! if (isEOLN()) {
! skipLineTerminator();
! } else {
! lexError(position(), Errors.IllegalTextBlockOpen);
! return;
}
!
! // While characters are available.
! while (!isEOF()) {
! if (accept("\"\"\"")) {
! return;
}
+
if (isEOLN()) {
skipLineTerminator();
+ // Add normalized line terminator to literal buffer.
+ put('\n');
+
+ // Record first line terminator for error recovery.
+ if (firstEOLN == NOT_FOUND) {
+ firstEOLN = position();
+ }
} else {
! // Add character to string buffer.
! scanLitChar(pos);
}
}
+ } else {
+ // Skip first quote.
+ next();
+
// While characters are available.
! while (!isEOF()) {
! if (accept('\"')) {
return;
}
!
! if (isEOLN()) {
// Line terminator in string literal is an error.
// Fall out to unclosed string literal error.
break;
} else {
// Add character to string buffer.
! scanLitChar(pos);
! }
}
}
+
// String ended without close delimiter sequence.
! lexError(pos, isTextBlock ? Errors.UnclosedTextBlock : Errors.UnclosedStrLit);
!
! if (firstEOLN != NOT_FOUND) {
! // Reset recovery position to point after text block open delimiter sequence.
! reset(firstEOLN);
}
}
+ /**
+ * Scan sequence of digits.
+ *
+ * @param pos position of the first character in literal.
+ * @param digitRadix radix of numeric literal.
+ */
private void scanDigits(int pos, int digitRadix) {
! int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
! int trailingUnderscorePos;
!
do {
! if (!is('_')) {
! put();
! trailingUnderscorePos = NOT_FOUND;
! } else {
! trailingUnderscorePos = position();
! }
!
! next();
! } while (digit(pos, digitRadix) >= 0 || is('_'));
!
! if (leadingUnderscorePos != NOT_FOUND) {
! lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
! } else if (trailingUnderscorePos != NOT_FOUND) {
! lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
}
}
! /**
! * Read fractional part of hexadecimal floating point number.
! *
! * @param pos position of the first character in literal.
*/
private void scanHexExponentAndSuffix(int pos) {
! if (acceptOneOfThenPut('p', 'P')) {
skipIllegalUnderscores();
! acceptOneOfThenPut('+', '-');
skipIllegalUnderscores();
!
! if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
! } else {
lexError(pos, Errors.MalformedFpLit);
+ }
} else {
lexError(pos, Errors.MalformedFpLit);
}
!
! if (acceptOneOfThenPut('f', 'F')) {
tk = TokenKind.FLOATLITERAL;
radix = 16;
} else {
! acceptOneOfThenPut('d', 'D');
tk = TokenKind.DOUBLELITERAL;
radix = 16;
}
}
! /**
! * Read fractional part of floating point number.
! *
! * @param pos position of the first character in literal.
*/
private void scanFraction(int pos) {
skipIllegalUnderscores();
!
! if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
}
!
! int index = sb.length();
!
! if (acceptOneOfThenPut('e', 'E')) {
skipIllegalUnderscores();
! acceptOneOfThenPut('+', '-');
skipIllegalUnderscores();
!
! if (digit(pos, 10) >= 0) {
scanDigits(pos, 10);
return;
}
+
lexError(pos, Errors.MalformedFpLit);
! sb.setLength(index);
}
}
! /**
! * Read fractional part and 'd' or 'f' suffix of floating point number.
! *
! * @param pos position of the first character in literal.
*/
private void scanFractionAndSuffix(int pos) {
radix = 10;
scanFraction(pos);
!
! if (acceptOneOfThenPut('f', 'F')) {
tk = TokenKind.FLOATLITERAL;
} else {
! acceptOneOfThenPut('d', 'D');
tk = TokenKind.DOUBLELITERAL;
}
}
! /**
! * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
! *
! * @param pos position of the first character in literal.
*/
private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
radix = 16;
! Assert.check(is('.'));
! putThenNext();
skipIllegalUnderscores();
!
! if (digit(pos, 16) >= 0) {
seendigit = true;
scanDigits(pos, 16);
}
+
if (!seendigit)
lexError(pos, Errors.InvalidHexNumber);
else
scanHexExponentAndSuffix(pos);
}
+ /**
+ * Skip over underscores and report as a error if found.
+ */
private void skipIllegalUnderscores() {
! if (is('_')) {
! lexError(position(), Errors.IllegalUnderscore);
! skip('_');
}
}
! /**
! * Read a number. (Spec. 3.10)
! *
! * @param pos position of the first character in literal.
! * @param radix the radix of the number; one of 2, 8, 10, 16.
*/
private void scanNumber(int pos, int radix) {
// for octal, allow base-10 digit in case it's a float literal
this.radix = radix;
int digitRadix = (radix == 8 ? 10 : radix);
! int firstDigit = digit(pos, Math.max(10, digitRadix));
boolean seendigit = firstDigit >= 0;
boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
+
if (seendigit) {
scanDigits(pos, digitRadix);
}
!
! if (radix == 16 && is('.')) {
scanHexFractionAndSuffix(pos, seendigit);
! } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
scanHexExponentAndSuffix(pos);
! } else if (digitRadix == 10 && is('.')) {
! putThenNext();
scanFractionAndSuffix(pos);
! } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {
scanFractionAndSuffix(pos);
} else {
if (!seenValidDigit) {
switch (radix) {
case 2:
*** 643,669 ****
case 16:
lexError(pos, Errors.InvalidHexNumber);
break;
}
}
! if (reader.ch == 'l' || reader.ch == 'L') {
! reader.scanChar();
tk = TokenKind.LONGLITERAL;
} else {
tk = TokenKind.INTLITERAL;
}
}
}
! /** Read an identifier.
*/
private void scanIdent() {
! boolean isJavaIdentifierPart;
! char high;
! reader.putChar(true);
do {
! switch (reader.ch) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
--- 636,670 ----
case 16:
lexError(pos, Errors.InvalidHexNumber);
break;
}
}
!
! if (acceptOneOf('l', 'L')) {
tk = TokenKind.LONGLITERAL;
} else {
tk = TokenKind.INTLITERAL;
}
}
}
! /**
! * Determines if the sequence in the literal buffer is a token (keyword, operator.)
! */
! private void checkIdent() {
! name = names.fromString(sb.toString());
! tk = tokens.lookupKind(name);
! }
!
! /**
! * Read an identifier. (Spec. 3.8)
*/
private void scanIdent() {
! putThenNext();
!
do {
! switch (get()) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
*** 676,798 ****
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
! reader.scanChar();
continue;
case '\u001A': // EOI is also a legal identifier part
! if (reader.bp >= reader.buflen) {
! name = reader.name();
! tk = tokens.lookupKind(name);
return;
}
! reader.scanChar();
continue;
default:
! if (reader.ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
! if (Character.isIdentifierIgnorable(reader.ch)) {
! reader.scanChar();
continue;
- } else {
- int codePoint = reader.peekSurrogates();
- if (codePoint >= 0) {
- if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
- reader.putChar(true);
- }
- } else {
- isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
- }
}
}
if (!isJavaIdentifierPart) {
! name = reader.name();
! tk = tokens.lookupKind(name);
return;
}
}
! reader.putChar(true);
} while (true);
}
! /** Return true if reader.ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
default:
return false;
}
}
! /** Read longest possible sequence of special characters and convert
! * to token.
*/
private void scanOperator() {
while (true) {
! reader.putChar(false);
! Name newname = reader.name();
! TokenKind tk1 = tokens.lookupKind(newname);
! if (tk1 == TokenKind.IDENTIFIER) {
! reader.sp--;
break;
}
- tk = tk1;
- reader.scanChar();
- if (!isSpecial(reader.ch)) break;
}
}
! /** Read token.
*/
public Token readToken() {
!
! reader.sp = 0;
name = null;
radix = 0;
! int pos = 0;
! int endPos = 0;
List<Comment> comments = null;
try {
loop: while (true) {
! pos = reader.bp;
! switch (reader.ch) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
! case FF: // (Spec 3.6)
! do {
! reader.scanChar();
! } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
! processWhiteSpace(pos, reader.bp);
break;
! case LF: // (Spec 3.4)
! reader.scanChar();
! processLineTerminator(pos, reader.bp);
break;
! case CR: // (Spec 3.4)
! reader.scanChar();
! if (reader.ch == LF) {
! reader.scanChar();
! }
! processLineTerminator(pos, reader.bp);
break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
--- 677,811 ----
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
+
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
! next();
continue;
+
case '\u001A': // EOI is also a legal identifier part
! if (isEOF()) {
! checkIdent();
return;
}
!
! next();
continue;
+
default:
! boolean isJavaIdentifierPart;
!
! if (isASCII()) {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
! if (Character.isIdentifierIgnorable(get())) {
! next();
continue;
}
+
+ isJavaIdentifierPart = isSurrogate()
+ ? Character.isJavaIdentifierPart(getCodepoint())
+ : Character.isJavaIdentifierPart(get());
}
+
if (!isJavaIdentifierPart) {
! checkIdent();
return;
}
}
!
! putThenNext();
} while (true);
}
! /**
! * Return true if ch can be part of an operator.
! *
! * @param ch character to check.
! *
! * @return true if ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
+
default:
return false;
}
}
! /**
! * Read longest possible sequence of special characters and convert to token.
*/
private void scanOperator() {
while (true) {
! put();
! TokenKind newtk = tokens.lookupKind(sb.toString());
!
! if (newtk == TokenKind.IDENTIFIER) {
! sb.setLength(sb.length() - 1);
! break;
! }
!
! tk = newtk;
! next();
!
! if (!isSpecial(get())) {
break;
}
}
}
! /**
! * Read token (main entrypoint.)
*/
public Token readToken() {
! sb.setLength(0);
name = null;
radix = 0;
+ isTextBlock = false;
+ hasEscapeSequences = false;
! int pos;
List<Comment> comments = null;
try {
loop: while (true) {
! pos = position();
!
! switch (get()) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
! case '\f': // (Spec 3.6)
! skipWhitespace();
! processWhiteSpace(pos, position());
break;
!
! case '\n': // (Spec 3.4)
! next();
! processLineTerminator(pos, position());
break;
!
! case '\r': // (Spec 3.4)
! next();
! accept('\n');
! processLineTerminator(pos, position());
break;
+
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
*** 801,996 ****
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
! case '$': case '_':
scanIdent();
break loop;
! case '0':
! reader.scanChar();
! if (reader.ch == 'x' || reader.ch == 'X') {
! reader.scanChar();
skipIllegalUnderscores();
scanNumber(pos, 16);
! } else if (reader.ch == 'b' || reader.ch == 'B') {
! reader.scanChar();
skipIllegalUnderscores();
scanNumber(pos, 2);
} else {
! reader.putChar('0');
! if (reader.ch == '_') {
! int savePos = reader.bp;
! do {
! reader.scanChar();
! } while (reader.ch == '_');
! if (reader.digit(pos, 10) < 0) {
lexError(savePos, Errors.IllegalUnderscore);
}
}
scanNumber(pos, 8);
}
break loop;
case '1': case '2': case '3': case '4':
! case '5': case '6': case '7': case '8': case '9':
scanNumber(pos, 10);
break loop;
! case '.':
! reader.scanChar();
! if (reader.digit(pos, 10) >= 0) {
! reader.putChar('.');
! scanFractionAndSuffix(pos);
! } else if (reader.ch == '.') {
! int savePos = reader.bp;
! reader.putChar('.'); reader.putChar('.', true);
! if (reader.ch == '.') {
! reader.scanChar();
! reader.putChar('.');
tk = TokenKind.ELLIPSIS;
} else {
lexError(savePos, Errors.IllegalDot);
! }
} else {
tk = TokenKind.DOT;
}
break loop;
! case ',':
! reader.scanChar(); tk = TokenKind.COMMA; break loop;
! case ';':
! reader.scanChar(); tk = TokenKind.SEMI; break loop;
! case '(':
! reader.scanChar(); tk = TokenKind.LPAREN; break loop;
! case ')':
! reader.scanChar(); tk = TokenKind.RPAREN; break loop;
! case '[':
! reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
! case ']':
! reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
! case '{':
! reader.scanChar(); tk = TokenKind.LBRACE; break loop;
! case '}':
! reader.scanChar(); tk = TokenKind.RBRACE; break loop;
case '/':
! reader.scanChar();
! if (reader.ch == '/') {
! do {
! reader.scanCommentChar();
! } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
! if (reader.bp < reader.buflen) {
! comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
}
break;
! } else if (reader.ch == '*') {
boolean isEmpty = false;
- reader.scanChar();
CommentStyle style;
! if (reader.ch == '*') {
style = CommentStyle.JAVADOC;
! reader.scanCommentChar();
! if (reader.ch == '/') {
isEmpty = true;
}
} else {
style = CommentStyle.BLOCK;
}
! while (!isEmpty && reader.bp < reader.buflen) {
! if (reader.ch == '*') {
! reader.scanChar();
! if (reader.ch == '/') break;
} else {
! reader.scanCommentChar();
}
}
! if (reader.ch == '/') {
! reader.scanChar();
! comments = addComment(comments, processComment(pos, reader.bp, style));
break;
} else {
lexError(pos, Errors.UnclosedComment);
break loop;
}
! } else if (reader.ch == '=') {
! tk = TokenKind.SLASHEQ;
! reader.scanChar();
} else {
! tk = TokenKind.SLASH;
}
break loop;
! case '\'':
! reader.scanChar();
! if (reader.ch == '\'') {
lexError(pos, Errors.EmptyCharLit);
- reader.scanChar();
} else {
! if (isEOLN())
lexError(pos, Errors.IllegalLineEndInCharLit);
! scanLitChar(pos, true, false);
! if (reader.ch == '\'') {
! reader.scanChar();
tk = TokenKind.CHARLITERAL;
} else {
lexError(pos, Errors.UnclosedCharLit);
}
}
break loop;
! case '\"':
scanString(pos);
break loop;
default:
! if (isSpecial(reader.ch)) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
! int codePoint = -1;
! if (reader.ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
! codePoint = reader.peekSurrogates();
! if (codePoint >= 0) {
! if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
! reader.putChar(true);
! }
! } else {
! isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
! }
}
if (isJavaIdentifierStart) {
scanIdent();
! } else if (reader.digit(pos, 10) >= 0) {
scanNumber(pos, 10);
! } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
tk = TokenKind.EOF;
! pos = reader.realLength;
} else {
String arg;
! if (codePoint >= 0) {
! char high = reader.ch;
! reader.scanChar();
! arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
! } else {
! arg = (32 < reader.ch && reader.ch < 127) ?
! String.format("%s", reader.ch) :
! String.format("\\u%04x", (int)reader.ch);
}
lexError(pos, Errors.IllegalChar(arg));
! reader.scanChar();
}
}
break loop;
}
}
! endPos = reader.bp;
! switch (tk.tag) {
! case DEFAULT: return new Token(tk, pos, endPos, comments);
! case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
! case STRING: {
// Get characters from string buffer.
! String string = reader.chars();
// If a text block.
! if (isTextBlock && TextBlockSupport.hasSupport()) {
// Verify that the incidental indentation is consistent.
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
Set<TextBlockSupport.WhitespaceChecks> checks =
TextBlockSupport.checkWhitespace(string);
if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
--- 814,1048 ----
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
! case '$': case '_': // (Spec. 3.8)
scanIdent();
break loop;
!
! case '0': // (Spec. 3.10)
! next();
!
! if (acceptOneOf('x', 'X')) {
skipIllegalUnderscores();
scanNumber(pos, 16);
! } else if (acceptOneOf('b', 'B')) {
skipIllegalUnderscores();
scanNumber(pos, 2);
} else {
! put('0');
!
! if (is('_')) {
! int savePos = position();
! skip('_');
!
! if (digit(pos, 10) < 0) {
lexError(savePos, Errors.IllegalUnderscore);
}
}
+
scanNumber(pos, 8);
}
break loop;
+
case '1': case '2': case '3': case '4':
! case '5': case '6': case '7': case '8': case '9': // (Spec. 3.10)
scanNumber(pos, 10);
break loop;
!
! case '.': // (Spec. 3.12)
! if (accept("...")) {
! put("...");
tk = TokenKind.ELLIPSIS;
} else {
+ next();
+ int savePos = position();
+
+ if (accept('.')) {
lexError(savePos, Errors.IllegalDot);
! } else if (digit(pos, 10) >= 0) {
! put('.');
! scanFractionAndSuffix(pos); // (Spec. 3.10)
} else {
tk = TokenKind.DOT;
}
+ }
+ break loop;
+
+ case ',': // (Spec. 3.12)
+ next();
+ tk = TokenKind.COMMA;
+ break loop;
+
+ case ';': // (Spec. 3.12)
+ next();
+ tk = TokenKind.SEMI;
break loop;
!
! case '(': // (Spec. 3.12)
! next();
! tk = TokenKind.LPAREN;
! break loop;
!
! case ')': // (Spec. 3.12)
! next();
! tk = TokenKind.RPAREN;
! break loop;
!
! case '[': // (Spec. 3.12)
! next();
! tk = TokenKind.LBRACKET;
! break loop;
!
! case ']': // (Spec. 3.12)
! next();
! tk = TokenKind.RBRACKET;
! break loop;
!
! case '{': // (Spec. 3.12)
! next();
! tk = TokenKind.LBRACE;
! break loop;
!
! case '}': // (Spec. 3.12)
! next();
! tk = TokenKind.RBRACE;
! break loop;
!
case '/':
! next();
!
! if (accept('/')) { // (Spec. 3.7)
! skipToEOLN();
!
! if (!isEOF()) {
! comments = appendComment(comments, processComment(pos, position(), CommentStyle.LINE));
}
break;
! } else if (accept('*')) { // (Spec. 3.7)
boolean isEmpty = false;
CommentStyle style;
!
! if (accept('*')) {
style = CommentStyle.JAVADOC;
!
! if (is('/')) {
isEmpty = true;
}
} else {
style = CommentStyle.BLOCK;
}
!
! if (!isEmpty) {
! while (!isEOF()) {
! if (accept('*')) {
! if (is('/')) {
! break;
! }
} else {
! next();
! }
}
}
!
! if (accept('/')) {
! comments = appendComment(comments, processComment(pos, position(), style));
!
break;
} else {
lexError(pos, Errors.UnclosedComment);
+
break loop;
}
! } else if (accept('=')) {
! tk = TokenKind.SLASHEQ; // (Spec. 3.12)
} else {
! tk = TokenKind.SLASH; // (Spec. 3.12)
}
break loop;
!
! case '\'': // (Spec. 3.10)
! next();
!
! if (accept('\'')) {
lexError(pos, Errors.EmptyCharLit);
} else {
! if (isEOLN()) {
lexError(pos, Errors.IllegalLineEndInCharLit);
! }
!
! scanLitChar(pos);
!
! if (accept('\'')) {
tk = TokenKind.CHARLITERAL;
} else {
lexError(pos, Errors.UnclosedCharLit);
}
}
break loop;
!
! case '\"': // (Spec. 3.10)
scanString(pos);
break loop;
+
default:
! if (isSpecial(get())) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
!
! if (isASCII()) {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
! isJavaIdentifierStart = isSurrogate()
! ? Character.isJavaIdentifierStart(getCodepoint())
! : Character.isJavaIdentifierStart(get());
}
+
if (isJavaIdentifierStart) {
scanIdent();
! } else if (digit(pos, 10) >= 0) {
scanNumber(pos, 10);
! } else if (is((char)EOI) || isEOF()) {
tk = TokenKind.EOF;
! pos = position();
} else {
String arg;
! if (isSurrogate()) {
! int codePoint = getCodepoint();
! char hi = Character.highSurrogate(codePoint);
! char lo = Character.lowSurrogate(codePoint);
! arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
! } else {
! char ch = get();
! arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
! String.format("\\u%04x", (int) ch);
}
+
lexError(pos, Errors.IllegalChar(arg));
! next();
}
}
break loop;
}
}
!
! int endPos = position();
!
! if (tk.tag == Token.Tag.DEFAULT) {
! return new Token(tk, pos, endPos, comments);
! } else if (tk.tag == Token.Tag.NAMED) {
! return new NamedToken(tk, pos, endPos, name, comments);
! } else {
// Get characters from string buffer.
! String string = sb.toString();
!
// If a text block.
! if (isTextBlock) {
// Verify that the incidental indentation is consistent.
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
Set<TextBlockSupport.WhitespaceChecks> checks =
TextBlockSupport.checkWhitespace(string);
if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
*** 1002,1219 ****
Warnings.TrailingWhiteSpaceWillBeRemoved);
}
}
// Remove incidental indentation.
try {
! string = TextBlockSupport.stripIndent(string);
} catch (Exception ex) {
// Error already reported, just use unstripped string.
}
}
// Translate escape sequences if present.
! if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
try {
! string = TextBlockSupport.translateEscapes(string);
} catch (Exception ex) {
// Error already reported, just use untranslated string.
}
}
// Build string token.
return new StringToken(tk, pos, endPos, string, comments);
! }
! case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
! default: throw new AssertionError();
}
}
! finally {
if (scannerDebug) {
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
! new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
}
}
! //where
! List<Comment> addComment(List<Comment> comments, Comment comment) {
return comments == null ?
List.of(comment) :
comments.prepend(comment);
}
! /** Return the position where a lexical error occurred;
*/
public int errPos() {
return errPos;
}
! /** Set the position where a lexical error occurred;
*/
public void errPos(int pos) {
errPos = pos;
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
*/
protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
! if (scannerDebug)
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
! + new String(reader.getRawCharacters(pos, endPos))
+ "|");
! char[] buf = reader.getRawCharacters(pos, endPos);
! return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
*/
protected void processWhiteSpace(int pos, int endPos) {
! if (scannerDebug)
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
! new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a line terminator has been processed.
*/
protected void processLineTerminator(int pos, int endPos) {
! if (scannerDebug)
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
! new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
! /** Build a map for translating between line numbers and
! * positions in the input.
*
! * @return a LineMap */
public Position.LineMap getLineMap() {
! return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
}
-
/**
* Scan a documentation comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
! protected static class BasicComment<U extends UnicodeReader> implements Comment {
!
CommentStyle cs;
- U comment_reader;
protected boolean deprecatedFlag = false;
protected boolean scanned = false;
! protected BasicComment(U comment_reader, CommentStyle cs) {
! this.comment_reader = comment_reader;
this.cs = cs;
}
public String getText() {
return null;
}
public int getSourcePos(int pos) {
return -1;
}
public CommentStyle getStyle() {
return cs;
}
public boolean isDeprecated() {
if (!scanned && cs == CommentStyle.JAVADOC) {
scanDocComment();
}
return deprecatedFlag;
}
! @SuppressWarnings("fallthrough")
protected void scanDocComment() {
try {
boolean deprecatedPrefix = false;
!
! comment_reader.bp += 3; // '/**'
! comment_reader.ch = comment_reader.buf[comment_reader.bp];
forEachLine:
! while (comment_reader.bp < comment_reader.buflen) {
!
// Skip optional WhiteSpace at beginning of line
! while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
! comment_reader.scanCommentChar();
! }
// Skip optional consecutive Stars
! while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
! comment_reader.scanCommentChar();
! if (comment_reader.ch == '/') {
return;
}
}
// Skip optional WhiteSpace after Stars
! while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
! comment_reader.scanCommentChar();
! }
- deprecatedPrefix = false;
// At beginning of line in the JavaDoc sense.
! if (!deprecatedFlag) {
! String deprecated = "@deprecated";
! int i = 0;
! while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
! comment_reader.scanCommentChar();
! i++;
! if (i == deprecated.length()) {
! deprecatedPrefix = true;
! break;
! }
! }
! }
! if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
! if (Character.isWhitespace(comment_reader.ch)) {
deprecatedFlag = true;
! } else if (comment_reader.ch == '*') {
! comment_reader.scanCommentChar();
! if (comment_reader.ch == '/') {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
! while (comment_reader.bp < comment_reader.buflen) {
! switch (comment_reader.ch) {
case '*':
! comment_reader.scanCommentChar();
! if (comment_reader.ch == '/') {
return;
}
break;
! case CR: // (Spec 3.4)
! comment_reader.scanCommentChar();
! if (comment_reader.ch != LF) {
! continue forEachLine;
! }
! /* fall through to LF case */
! case LF: // (Spec 3.4)
! comment_reader.scanCommentChar();
continue forEachLine;
default:
! comment_reader.scanCommentChar();
}
} // rest of line
} // forEachLine
return;
} finally {
--- 1054,1335 ----
Warnings.TrailingWhiteSpaceWillBeRemoved);
}
}
// Remove incidental indentation.
try {
! string = string.stripIndent();
} catch (Exception ex) {
// Error already reported, just use unstripped string.
}
}
+
// Translate escape sequences if present.
! if (hasEscapeSequences) {
try {
! string = string.translateEscapes();
} catch (Exception ex) {
// Error already reported, just use untranslated string.
}
}
+
+ if (tk.tag == Token.Tag.STRING) {
// Build string token.
return new StringToken(tk, pos, endPos, string, comments);
! } else {
! // Build numeric token.
! return new NumericToken(tk, pos, endPos, string, radix, comments);
}
}
! } finally {
! int endPos = position();
!
if (scannerDebug) {
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
! new String(getRawCharacters(pos, endPos))
+ "|");
}
}
}
!
! /**
! * Appends a comment to the list of comments preceding the current token.
! *
! * @param comments existing list of comments.
! * @param comment comment to append.
! *
! * @return new list with comment prepended to the existing list.
! */
! List<Comment> appendComment(List<Comment> comments, Comment comment) {
return comments == null ?
List.of(comment) :
comments.prepend(comment);
}
! /**
! * Return the position where a lexical error occurred.
! *
! * @return position in the input buffer of where the error occurred.
*/
public int errPos() {
return errPos;
}
! /**
! * Set the position where a lexical error occurred.
! *
! * @param pos position in the input buffer of where the error occurred.
*/
public void errPos(int pos) {
errPos = pos;
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
+ *
+ * @param pos position of the opening / in the input buffer.
+ * @param endPos position + 1 of the closing / in the input buffer.
+ * @param style style of comment.
+ *
+ * @return the constructed BasicComment.
*/
protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
! if (scannerDebug) {
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
! + new String(getRawCharacters(pos, endPos))
+ "|");
! }
!
! char[] buf = getRawCharacters(pos, endPos);
!
! return new BasicComment(style, fac, buf, pos);
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
+ *
+ * (Spec 3.6)
+ *
+ * @param pos position in input buffer of first whitespace character.
+ * @param endPos position + 1 in input buffer of last whitespace character.
*/
protected void processWhiteSpace(int pos, int endPos) {
! if (scannerDebug) {
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
! new String(getRawCharacters(pos, endPos))
+ "|");
}
+ }
/**
* Called when a line terminator has been processed.
+ *
+ * @param pos position in input buffer of first character in sequence.
+ * @param endPos position + 1 in input buffer of last character in sequence.
*/
protected void processLineTerminator(int pos, int endPos) {
! if (scannerDebug) {
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
! new String(getRawCharacters(pos, endPos))
+ "|");
}
+ }
! /**
! * Build a map for translating between line numbers and positions in the input.
*
! * @return a LineMap
! */
public Position.LineMap getLineMap() {
! return Position.makeLineMap(getRawCharacters(), length(), false);
}
/**
* Scan a documentation comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
! protected static class BasicComment extends PositionTrackingReader implements Comment {
! /**
! * Style of comment
! * LINE starting with //
! * BLOCK starting with /*
! * JAVADOC starting with /**
! */
CommentStyle cs;
+ /**
+ * true if comment contains @deprecated at beginning of a line.
+ */
protected boolean deprecatedFlag = false;
+
+ /**
+ * true if comment has been fully scanned.
+ */
protected boolean scanned = false;
! /**
! * Constructor.
! *
! * @param cs comment style
! * @param sf Scan factory.
! * @param array Array containing contents of source.
! * @param offset Position offset in original source buffer.
! */
! protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
! super(sf, array, offset);
this.cs = cs;
}
+ /**
+ * Return comment body text minus comment adornments or null if not scanned.
+ *
+ * @return comment body text.
+ */
public String getText() {
return null;
}
+ /**
+ * Return buffer position in original buffer mapped from buffer position in comment.
+ *
+ * @param pos buffer position in comment.
+ *
+ * @return buffer position in original buffer.
+ */
public int getSourcePos(int pos) {
return -1;
}
+ /**
+ * Return style of comment.
+ * LINE starting with //
+ * BLOCK starting with /*
+ * JAVADOC starting with /**
+ *
+ * @return
+ */
public CommentStyle getStyle() {
return cs;
}
+ /**
+ * true if comment contains @deprecated at beginning of a line.
+ *
+ * @return true if comment contains @deprecated.
+ */
public boolean isDeprecated() {
if (!scanned && cs == CommentStyle.JAVADOC) {
scanDocComment();
}
+
return deprecatedFlag;
}
! /**
! * Scan JAVADOC comment for details.
! */
protected void scanDocComment() {
try {
boolean deprecatedPrefix = false;
! accept("/**");
forEachLine:
! while (!isEOF()) {
// Skip optional WhiteSpace at beginning of line
! skipWhitespace();
// Skip optional consecutive Stars
! while (accept('*')) {
! if (is('/')) {
return;
}
}
// Skip optional WhiteSpace after Stars
! skipWhitespace();
// At beginning of line in the JavaDoc sense.
! deprecatedPrefix = deprecatedFlag || accept("@deprecated");
! if (deprecatedPrefix && !isEOF()) {
! if (Character.isWhitespace(get())) {
deprecatedFlag = true;
! } else if (accept('*')) {
! if (is('/')) {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
! while (!isEOF()) {
! switch (get()) {
case '*':
! next();
!
! if (is('/')) {
return;
}
+
break;
! case '\r': // (Spec 3.4)
! case '\n': // (Spec 3.4)
! accept('\r');
! accept('\n');
continue forEachLine;
+
default:
! next();
! break;
}
} // rest of line
} // forEachLine
return;
} finally {
< prev index next >