/* * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.tools.javac.parser; import com.sun.tools.javac.code.Preview; import com.sun.tools.javac.code.Source; import com.sun.tools.javac.code.Source.Feature; import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; import com.sun.tools.javac.resources.CompilerProperties.Errors; import com.sun.tools.javac.util.*; import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag; import java.nio.CharBuffer; import static com.sun.tools.javac.parser.Tokens.*; import static com.sun.tools.javac.util.LayoutCharacters.*; /** The lexical analyzer maps an input stream consisting of * ASCII characters and Unicode escapes into a token sequence. * *
This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.
*/
public class JavaTokenizer {
private static final boolean scannerDebug = false;
/** The source language setting.
*/
private Source source;
/** The preview language setting. */
private Preview preview;
/** The log to be used for error reporting.
*/
private final Log log;
/** The token factory. */
private final Tokens tokens;
/** The token kind, set by nextToken().
*/
protected TokenKind tk;
/** The token's radix, set by nextToken().
*/
protected int radix;
/** The token's name, set by nextToken().
*/
protected Name name;
/** The position where a lexical error occurred;
*/
protected int errPos = Position.NOPOS;
/** The Unicode reader (low-level stream reader).
*/
protected UnicodeReader reader;
protected ScannerFactory fac;
private static final boolean hexFloatsWork = hexFloatsWork();
private static boolean hexFloatsWork() {
try {
Float.valueOf("0x1.0p1");
return true;
} catch (NumberFormatException ex) {
return false;
}
}
/**
* Create a scanner from the input array. This method might
* modify the array. To avoid copying the input array, ensure
* that {@code inputLength < input.length} or
* {@code input[input.length -1]} is a white space character.
*
* @param fac the factory which created this Scanner
* @param buf the input, might be modified
* Must be positive and less than or equal to input.length.
*/
protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
this(fac, new UnicodeReader(fac, buf));
}
protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
this(fac, new UnicodeReader(fac, buf, inputLength));
}
protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
this.fac = fac;
this.log = fac.log;
this.tokens = fac.tokens;
this.source = fac.source;
this.preview = fac.preview;
this.reader = reader;
}
protected void checkSourceLevel(int pos, Feature feature) {
if (preview.isPreview(feature) && !preview.isEnabled()) {
//preview feature without --preview flag, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
} else if (!feature.allowedInSource(source)) {
//incompatible source level, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
} else if (preview.isPreview(feature)) {
//use of preview feature, warn
preview.warnPreview(pos, feature);
}
}
/** Report an error at the given position using the provided arguments.
*/
protected void lexError(int pos, JCDiagnostic.Error key) {
log.error(pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
log.error(flags, pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
/** Read next character in character or string literal and copy into sbuf.
*/
private void scanLitChar(int pos) {
if (reader.ch == '\\') {
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
reader.skipChar();
reader.putChar('\\', true);
} else {
reader.scanChar();
switch (reader.ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
char leadch = reader.ch;
int oct = reader.digit(pos, 8);
reader.scanChar();
if ('0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
}
}
reader.putChar((char)oct);
break;
case 'b':
reader.putChar('\b', true); break;
case 't':
reader.putChar('\t', true); break;
case 'n':
reader.putChar('\n', true); break;
case 'f':
reader.putChar('\f', true); break;
case 'r':
reader.putChar('\r', true); break;
case '\'':
reader.putChar('\'', true); break;
case '\"':
reader.putChar('\"', true); break;
case '\\':
reader.putChar('\\', true); break;
default:
lexError(reader.bp, Errors.IllegalEscChar);
}
}
} else if (reader.bp != reader.buflen) {
reader.putChar(true);
}
}
private void scanDigits(int pos, int digitRadix) {
char saveCh;
int savePos;
do {
if (reader.ch != '_') {
reader.putChar(false);
}
saveCh = reader.ch;
savePos = reader.bp;
reader.scanChar();
} while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
if (saveCh == '_')
lexError(savePos, Errors.IllegalUnderscore);
}
/** Read fractional part of hexadecimal floating point number.
*/
private void scanHexExponentAndSuffix(int pos) {
if (reader.ch == 'p' || reader.ch == 'P') {
reader.putChar(true);
skipIllegalUnderscores();
if (reader.ch == '+' || reader.ch == '-') {
reader.putChar(true);
}
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
if (!hexFloatsWork)
lexError(pos, Errors.UnsupportedCrossFpLit);
} else
lexError(pos, Errors.MalformedFpLit);
} else {
lexError(pos, Errors.MalformedFpLit);
}
if (reader.ch == 'f' || reader.ch == 'F') {
reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
radix = 16;
} else {
if (reader.ch == 'd' || reader.ch == 'D') {
reader.putChar(true);
}
tk = TokenKind.DOUBLELITERAL;
radix = 16;
}
}
/** Read fractional part of floating point number.
*/
private void scanFraction(int pos) {
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
}
int sp1 = reader.sp;
if (reader.ch == 'e' || reader.ch == 'E') {
reader.putChar(true);
skipIllegalUnderscores();
if (reader.ch == '+' || reader.ch == '-') {
reader.putChar(true);
}
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
return;
}
lexError(pos, Errors.MalformedFpLit);
reader.sp = sp1;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanFractionAndSuffix(int pos) {
radix = 10;
scanFraction(pos);
if (reader.ch == 'f' || reader.ch == 'F') {
reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
} else {
if (reader.ch == 'd' || reader.ch == 'D') {
reader.putChar(true);
}
tk = TokenKind.DOUBLELITERAL;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
radix = 16;
Assert.check(reader.ch == '.');
reader.putChar(true);
skipIllegalUnderscores();
if (reader.digit(pos, 16) >= 0) {
seendigit = true;
scanDigits(pos, 16);
}
if (!seendigit)
lexError(pos, Errors.InvalidHexNumber);
else
scanHexExponentAndSuffix(pos);
}
private void skipIllegalUnderscores() {
if (reader.ch == '_') {
lexError(reader.bp, Errors.IllegalUnderscore);
while (reader.ch == '_')
reader.scanChar();
}
}
/** Read a number.
* @param radix The radix of the number; one of 2, 8, 10, 16.
*/
private void scanNumber(int pos, int radix) {
// for octal, allow base-10 digit in case it's a float literal
this.radix = radix;
int digitRadix = (radix == 8 ? 10 : radix);
int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
boolean seendigit = firstDigit >= 0;
boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
if (seendigit) {
scanDigits(pos, digitRadix);
}
if (radix == 16 && reader.ch == '.') {
scanHexFractionAndSuffix(pos, seendigit);
} else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
scanHexExponentAndSuffix(pos);
} else if (digitRadix == 10 && reader.ch == '.') {
reader.putChar(true);
scanFractionAndSuffix(pos);
} else if (digitRadix == 10 &&
(reader.ch == 'e' || reader.ch == 'E' ||
reader.ch == 'f' || reader.ch == 'F' ||
reader.ch == 'd' || reader.ch == 'D')) {
scanFractionAndSuffix(pos);
} else {
if (!seenValidDigit) {
switch (radix) {
case 2:
lexError(pos, Errors.InvalidBinaryNumber);
break;
case 16:
lexError(pos, Errors.InvalidHexNumber);
break;
}
}
if (reader.ch == 'l' || reader.ch == 'L') {
reader.scanChar();
tk = TokenKind.LONGLITERAL;
} else {
tk = TokenKind.INTLITERAL;
}
}
}
/** Read an identifier.
*/
private void scanIdent() {
boolean isJavaIdentifierPart;
char high;
reader.putChar(true);
do {
switch (reader.ch) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
reader.scanChar();
continue;
case '\u001A': // EOI is also a legal identifier part
if (reader.bp >= reader.buflen) {
name = reader.name();
tk = tokens.lookupKind(name);
return;
}
reader.scanChar();
continue;
default:
if (reader.ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
if (Character.isIdentifierIgnorable(reader.ch)) {
reader.scanChar();
continue;
} else {
int codePoint = reader.peekSurrogates();
if (codePoint >= 0) {
if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
reader.putChar(true);
}
} else {
isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
}
}
}
if (!isJavaIdentifierPart) {
name = reader.name();
tk = tokens.lookupKind(name);
return;
}
}
reader.putChar(true);
} while (true);
}
/** Return true if reader.ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
default:
return false;
}
}
/** Read longest possible sequence of special characters and convert
* to token.
*/
private void scanOperator() {
while (true) {
reader.putChar(false);
Name newname = reader.name();
TokenKind tk1 = tokens.lookupKind(newname);
if (tk1 == TokenKind.IDENTIFIER) {
reader.sp--;
break;
}
tk = tk1;
reader.scanChar();
if (!isSpecial(reader.ch)) break;
}
}
/** Read token.
*/
public Token readToken() {
reader.sp = 0;
name = null;
radix = 0;
int pos = 0;
int endPos = 0;
List