open Udiff src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

rev 60227 : 8224225: Tokenizer improvements
Reviewed-by: jlaskey

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -28,124 +28,151 @@
 import com.sun.tools.javac.code.Lint;
 import com.sun.tools.javac.code.Lint.LintCategory;
 import com.sun.tools.javac.code.Preview;
 import com.sun.tools.javac.code.Source;
 import com.sun.tools.javac.code.Source.Feature;
+import com.sun.tools.javac.file.JavacFileManager;
 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
 import com.sun.tools.javac.resources.CompilerProperties.Errors;
 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
 import com.sun.tools.javac.util.*;
 import com.sun.tools.javac.util.JCDiagnostic.*;
 
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
 import java.nio.CharBuffer;
-import java.util.HashSet;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 import static com.sun.tools.javac.parser.Tokens.*;
-import static com.sun.tools.javac.util.LayoutCharacters.*;
+import static com.sun.tools.javac.util.LayoutCharacters.EOI;
 
-/** The lexical analyzer maps an input stream consisting of
- *  ASCII characters and Unicode escapes into a token sequence.
+/**
+ * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
+ * escape sequences into a token sequence.
  *
  *  <p><b>This is NOT part of any supported API.
  *  If you write code that depends on this, you do so at your own risk.
  *  This code and its internal interfaces are subject to change or
  *  deletion without notice.</b>
  */
-public class JavaTokenizer {
-
+public class JavaTokenizer extends UnicodeReader {
+    /**
+     * If true then prints token information after each nextToken().
+     */
     private static final boolean scannerDebug = false;
 
-    /** The source language setting.
+    /**
+     * Sentinal for non-value.
+     */
+    private int NOT_FOUND = -1;
+
+    /**
+     * The source language setting. Copied from scanner factory.
      */
     private Source source;
 
-    /** The preview language setting. */
+    /**
+     * The preview language setting. Copied from scanner factory.
+     */
     private Preview preview;
 
-    /** The log to be used for error reporting.
+    /**
+     * The log to be used for error reporting. Copied from scanner factory.
      */
     private final Log log;
 
-    /** The token factory. */
+    /**
+     * The token factory. Copied from scanner factory.
+     */
     private final Tokens tokens;
 
-    /** The token kind, set by nextToken().
+    /**
+     * The names factory. Copied from scanner factory.
+     */
+    private final Names names;
+
+    /**
+     * The token kind, set by nextToken().
      */
     protected TokenKind tk;
 
-    /** The token's radix, set by nextToken().
+    /**
+     * The token's radix, set by nextToken().
      */
     protected int radix;
 
-    /** The token's name, set by nextToken().
+    /**
+     * The token's name, set by nextToken().
      */
     protected Name name;
 
-    /** The position where a lexical error occurred;
+    /**
+     * The position where a lexical error occurred;
      */
     protected int errPos = Position.NOPOS;
 
-    /** The Unicode reader (low-level stream reader).
-     */
-    protected UnicodeReader reader;
-
-    /** If is a text block
+    /**
+     * true if is a text block, set by nextToken().
      */
     protected boolean isTextBlock;
 
-    /** If contains escape sequences
+    /**
+     * true if contains escape sequences, set by nextToken().
      */
     protected boolean hasEscapeSequences;
 
+    /**
+     * Buffer for building literals, used by nextToken().
+     */
+    protected StringBuilder sb;
+
+    /**
+     * Origin scanner factory.
+     */
     protected ScannerFactory fac;
 
-    // The set of lint options currently in effect. It is initialized
-    // from the context, and then is set/reset as needed by Attr as it
-    // visits all the various parts of the trees during attribution.
+    /**
+     * The set of lint options currently in effect. It is initialized
+     * from the context, and then is set/reset as needed by Attr as it
+     * visits all the various parts of the trees during attribution.
+     */
     protected Lint lint;
 
-    private static final boolean hexFloatsWork = hexFloatsWork();
-    private static boolean hexFloatsWork() {
-        try {
-            Float.valueOf("0x1.0p1");
-            return true;
-        } catch (NumberFormatException ex) {
-            return false;
-        }
+    /**
+     * Construct a Java token scanner from the input character buffer.
+     *
+     * @param fac  the factory which created this Scanner.
+     * @param cb   the input character buffer.
+     */
+    protected JavaTokenizer(ScannerFactory fac, CharBuffer cb) {
+        this(fac, JavacFileManager.toArray(cb), cb.limit());
     }
 
     /**
-     * Create a scanner from the input array.  This method might
-     * modify the array.  To avoid copying the input array, ensure
-     * that {@code inputLength < input.length} or
-     * {@code input[input.length -1]} is a white space character.
+     * Construct a Java token scanner from the input character array.
      *
      * @param fac the factory which created this Scanner
-     * @param buf the input, might be modified
-     * Must be positive and less than or equal to input.length.
+     * @param array   the input character array.
+     * @param length  The length of the meaningful content in the array.
      */
-    protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
-        this(fac, new UnicodeReader(fac, buf));
-    }
-
-    protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
-        this(fac, new UnicodeReader(fac, buf, inputLength));
-    }
-
-    protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
+    protected JavaTokenizer(ScannerFactory fac, char[] array, int length) {
+        super(fac, array, length);
         this.fac = fac;
         this.log = fac.log;
+        this.names = fac.names;
         this.tokens = fac.tokens;
         this.source = fac.source;
         this.preview = fac.preview;
-        this.reader = reader;
         this.lint = fac.lint;
+        this.sb = new StringBuilder(256);
     }
 
+    /**
+     * Check the source level for a lexical feature.
+     *
+     * @param pos      position in input buffer.
+     * @param feature  feature to verify.
+     */
     protected void checkSourceLevel(int pos, Feature feature) {
         if (preview.isPreview(feature) && !preview.isEnabled()) {
             //preview feature without --preview flag, error
             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
         } else if (!feature.allowedInSource(source)) {

@@ -155,486 +182,452 @@
             //use of preview feature, warn
             preview.warnPreview(pos, feature);
         }
     }
 
-    /** Report an error at the given position using the provided arguments.
+    /**
+     * Report an error at the given position using the provided arguments.
+     *
+     * @param pos  position in input buffer.
+     * @param key  error key to report.
      */
     protected void lexError(int pos, JCDiagnostic.Error key) {
         log.error(pos, key);
         tk = TokenKind.ERROR;
         errPos = pos;
     }
 
+    /**
+     * Report an error at the given position using the provided arguments.
+     *
+     * @param flags  diagnostic flags.
+     * @param pos    position in input buffer.
+     * @param key    error key to report.
+     */
     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
         log.error(flags, pos, key);
         tk = TokenKind.ERROR;
         errPos = pos;
     }
 
+    /**
+     * Report an error at the given position using the provided arguments.
+     *
+     * @param lc     lint category.
+     * @param pos    position in input buffer.
+     * @param key    error key to report.
+     */
     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
         log.warning(lc, dp, key);
     }
 
-    /** Read next character in character or string literal and copy into sbuf.
-     *      pos - start of literal offset
-     *      translateEscapesNow - true if String::translateEscapes is not available
-     *                            in the java.base libs. Occurs during bootstrapping.
-     *      multiline - true if scanning a text block. Allows newlines to be embedded
-     *                  in the result.
-     */
-    private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
-         if (reader.ch == '\\') {
-            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
-                reader.skipChar();
-                if (!translateEscapesNow) {
-                    reader.putChar(false);
-                }
-                reader.putChar(true);
-            } else {
-                reader.nextChar(translateEscapesNow);
-                switch (reader.ch) {
-                case '0': case '1': case '2': case '3':
-                case '4': case '5': case '6': case '7':
-                    char leadch = reader.ch;
-                    int oct = reader.digit(pos, 8);
-                    reader.nextChar(translateEscapesNow);
-                    if ('0' <= reader.ch && reader.ch <= '7') {
-                        oct = oct * 8 + reader.digit(pos, 8);
-                        reader.nextChar(translateEscapesNow);
-                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
-                            oct = oct * 8 + reader.digit(pos, 8);
-                            reader.nextChar(translateEscapesNow);
-                        }
+    /**
+     * Add a character to the literal buffer.
+     *
+     * @param ch  character to add.
+     */
+    protected void put(char ch) {
+        sb.append(ch);
                     }
-                    if (translateEscapesNow) {
-                        reader.putChar((char)oct);
+
+    /**
+     * Add a codepoint to the literal buffer.
+     *
+     * @param codePoint  codepoint to add.
+     */
+    protected void putCodePoint(int codePoint) {
+        sb.appendCodePoint(codePoint);
                     }
-                    break;
-                case 'b':
-                    reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
-                case 't':
-                    reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
-                case 'n':
-                    reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
-                case 'f':
-                    reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
-                case 'r':
-                    reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
-                case '\'':
-                case '\"':
-                case '\\':
-                    reader.putChar(true); break;
-                case 's':
-                    checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
-                    reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
-                case '\n':
-                case '\r':
-                    if (!multiline) {
-                        lexError(reader.bp, Errors.IllegalEscChar);
+
+    /**
+     * Add current character or codepoint to the literal buffer.
+     */
+    protected void put() {
+        if (isSurrogate()) {
+            putCodePoint(getCodepoint());
                     } else {
-                        checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
-                        int start = reader.bp;
-                        if (reader.ch == '\r' && reader.peekChar() == '\n') {
-                           reader.nextChar(translateEscapesNow);
-                        }
-                        reader.nextChar(translateEscapesNow);
-                        processLineTerminator(start, reader.bp);
-                    }
-                    break;
-                default:
-                    lexError(reader.bp, Errors.IllegalEscChar);
-                }
-            }
-        } else if (reader.bp != reader.buflen) {
-            reader.putChar(true);
+            put(get());
         }
     }
 
-    /** Interim access to String methods used to support text blocks.
-     *  Required to handle bootstrapping with pre-text block jdks.
-     *  Should be replaced with direct calls in the 'next' jdk.
-     */
-    static class TextBlockSupport {
-        /** Reflection method to remove incidental indentation.
+    /**
+     * Add a string to the literal buffer.
          */
-        private static final Method stripIndent;
+    protected void put(String string) {
+        sb.append(string);
+    }
 
-        /** Reflection method to translate escape sequences.
+    /**
+     * Add current character or codepoint to the literal buffer then return next character.
          */
-        private static final Method translateEscapes;
+    protected char putThenNext() {
+        put();
 
-        /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
-         */
-        private static final boolean hasSupport;
+        return next();
+    }
 
-        /** Get a string method via refection or null if not available.
+    /**
+     * If the specified character ch matches the current character then add current character
+     * to the literal buffer and then advance.
+     *
+     * @param ch  character to match.
+     *
+     * @return true if ch matches current character.
          */
-        private static Method getStringMethodOrNull(String name) {
-            try {
-                return String.class.getMethod(name);
-            } catch (Exception ex) {
-                // Method not available, return null.
-            }
-            return null;
-        }
+    protected boolean acceptThenPut(char ch) {
+        if (is(ch)) {
+            put(get());
+            next();
 
-        static {
-            // Get text block string methods.
-            stripIndent = getStringMethodOrNull("stripIndent");
-            translateEscapes = getStringMethodOrNull("translateEscapes");
-            // true if stripIndent and translateEscapes are available in the bootstrap jdk.
-            hasSupport = stripIndent != null && translateEscapes != null;
-        }
-
-        /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
-         */
-        static boolean hasSupport() {
-            return hasSupport;
-        }
-
-        /** Return the leading whitespace count (indentation) of the line.
-         */
-        private static int indent(String line) {
-            return line.length() - line.stripLeading().length();
-        }
-
-        enum WhitespaceChecks {
-            INCONSISTENT,
-            TRAILING
-        };
-
-        /** Check that the use of white space in content is not problematic.
-         */
-        static Set<WhitespaceChecks> checkWhitespace(String string) {
-            // Start with empty result set.
-            Set<WhitespaceChecks> checks = new HashSet<>();
-            // No need to check empty strings.
-            if (string.isEmpty()) {
-                return checks;
-            }
-            // Maximum common indentation.
-            int outdent = 0;
-            // No need to check indentation if opting out (last line is empty.)
-            char lastChar = string.charAt(string.length() - 1);
-            boolean optOut = lastChar == '\n' || lastChar == '\r';
-            // Split string based at line terminators.
-            String[] lines = string.split("\\R");
-            int length = lines.length;
-            // Extract last line.
-            String lastLine = length == 0 ? "" : lines[length - 1];
-             if (!optOut) {
-                // Prime with the last line indentation (may be blank.)
-                outdent = indent(lastLine);
-                for (String line : lines) {
-                    // Blanks lines have no influence (last line accounted for.)
-                    if (!line.isBlank()) {
-                        outdent = Integer.min(outdent, indent(line));
-                        if (outdent == 0) {
-                            break;
-                        }
-                    }
-                }
-            }
-            // Last line is representative.
-            String start = lastLine.substring(0, outdent);
-            for (String line : lines) {
-                // Fail if a line does not have the same indentation.
-                if (!line.isBlank() && !line.startsWith(start)) {
-                    // Mix of different white space
-                    checks.add(WhitespaceChecks.INCONSISTENT);
-                }
-                // Line has content even after indent is removed.
-                if (outdent < line.length()) {
-                    // Is the last character a white space.
-                    lastChar = line.charAt(line.length() - 1);
-                    if (Character.isWhitespace(lastChar)) {
-                        // Has trailing white space.
-                        checks.add(WhitespaceChecks.TRAILING);
-                    }
-                }
-            }
-            return checks;
+            return true;
         }
 
-        /** Invoke String::stripIndent through reflection.
-         */
-        static String stripIndent(String string) {
-            try {
-                string = (String)stripIndent.invoke(string);
-            } catch (InvocationTargetException | IllegalAccessException ex) {
-                throw new RuntimeException(ex);
-            }
-            return string;
+        return false;
         }
 
-        /** Invoke String::translateEscapes through reflection.
+    /**
+     * If either ch1 or ch2 matches the current character then add current character
+     * to the literal buffer and then advance.
+     *
+     * @param ch1  first character to match.
+     * @param ch2  second character to match.
+     *
+     * @return true if either ch1 or ch2 matches current character.
          */
-        static String translateEscapes(String string) {
-            try {
-                string = (String)translateEscapes.invoke(string);
-            } catch (InvocationTargetException | IllegalAccessException ex) {
-                throw new RuntimeException(ex);
-            }
-            return string;
+    protected boolean acceptOneOfThenPut(char ch1, char ch2) {
+        if (isOneOf(ch1, ch2)) {
+            put(get());
+            next();
+
+            return true;
         }
+
+        return false;
     }
 
-    /** Test for EOLN.
+    /**
+     * Test if the current character is a line terminator.
+     *
+     * @return true if current character is a line terminator.
      */
     private boolean isEOLN() {
-        return reader.ch == LF || reader.ch == CR;
+        return isOneOf('\n', '\r');
     }
 
-    /** Test for CRLF.
+    /**
+     * Skip and process a line terminator sequence.
      */
-    private boolean isCRLF() {
-        return reader.ch == CR && reader.peekChar() == LF;
+    private void skipLineTerminator() {
+        int start = position();
+        accept('\r');
+        accept('\n');
+        processLineTerminator(start, position());
     }
 
-    /** Count and skip repeated occurrences of the specified character.
+    /**
+     * Processes the current character and places in the literal buffer. If the current
+     * character is a backslash then the next character is validated as a proper
+     * escape character. Conversion of escape sequences takes place at end of nextToken().
+     *
+     * @param pos position of the first character in literal.
      */
-    private int countChar(char ch, int max) {
-        int count = 0;
-        for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
-            reader.scanChar();
+    private void scanLitChar(int pos) {
+        if (acceptThenPut('\\')) {
+            hasEscapeSequences = true;
+
+            switch (get()) {
+                case '0': case '1': case '2': case '3':
+                case '4': case '5': case '6': case '7':
+                    char leadch = get();
+                    putThenNext();
+
+                    if (inRange('0', '7')) {
+                        putThenNext();
+
+                        if (leadch <= '3' && inRange('0', '7')) {
+                            putThenNext();
         }
-        return count;
     }
+                    break;
 
-    /** Skip and process a line terminator.
-     */
-    private void skipLineTerminator() {
-        int start = reader.bp;
-        if (isCRLF()) {
-            reader.scanChar();
+                case 'b':
+                case 't':
+                case 'n':
+                case 'f':
+                case 'r':
+                case '\'':
+                case '\"':
+                case '\\':
+                    putThenNext();
+                    break;
+
+                case 's':
+                    checkSourceLevel(position(), Feature.TEXT_BLOCKS);
+                    putThenNext();
+                    break;
+
+                case '\n':
+                case '\r':
+                    if (isTextBlock) {
+                        skipLineTerminator();
+                        // Normalize line terminator.
+                        put('\n');
+                    } else {
+                        lexError(position(), Errors.IllegalEscChar);
+                    }
+                    break;
+
+                default:
+                    lexError(position(), Errors.IllegalEscChar);
+                    break;
+            }
+        } else {
+            putThenNext();
         }
-        reader.scanChar();
-        processLineTerminator(start, reader.bp);
     }
 
-    /** Scan a string literal or text block.
+    /**
+     * Scan a string literal or text block.
+     *
+     * @param pos  position of the first character in literal.
      */
     private void scanString(int pos) {
-        // Clear flags.
-        isTextBlock = false;
-        hasEscapeSequences = false;
-        // Track the end of first line for error recovery.
-        int firstEOLN = -1;
-        // Attempt to scan for up to 3 double quotes.
-        int openCount = countChar('\"', 3);
-        switch (openCount) {
-        case 1: // Starting a string literal.
-            break;
-        case 2: // Starting an empty string literal.
+        // Assume the best.
             tk = Tokens.TokenKind.STRINGLITERAL;
-            return;
-        case 3: // Starting a text block.
+        // Track the end of first line for error recovery.
+        int firstEOLN = NOT_FOUND;
+        // Check for text block delimiter.
+        isTextBlock = accept("\"\"\"");
+
+        if (isTextBlock) {
             // Check if preview feature is enabled for text blocks.
             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
-            isTextBlock = true;
+
             // Verify the open delimiter sequence.
-            while (reader.bp < reader.buflen) {
-                char ch = reader.ch;
-                if (ch != ' ' && ch != '\t' && ch != FF) {
-                    break;
+            // Error if the open delimiter sequence is not """<white space>*<LineTerminator>.
+            skipWhitespace();
+
+            if (isEOLN()) {
+                skipLineTerminator();
+            } else {
+                lexError(position(), Errors.IllegalTextBlockOpen);
+                return;
                 }
-                reader.scanChar();
+
+            // While characters are available.
+            while (!isEOF()) {
+                if (accept("\"\"\"")) {
+                    return;
             }
+
             if (isEOLN()) {
                 skipLineTerminator();
+                    // Add normalized line terminator to literal buffer.
+                    put('\n');
+
+                    // Record first line terminator for error recovery.
+                    if (firstEOLN == NOT_FOUND) {
+                        firstEOLN = position();
+                    }
             } else {
-                // Error if the open delimiter sequence is not
-                //     """<white space>*<LineTerminator>.
-                lexError(reader.bp, Errors.IllegalTextBlockOpen);
-                return;
+                    // Add character to string buffer.
+                    scanLitChar(pos);
             }
-            break;
         }
+        } else {
+            // Skip first quote.
+            next();
+
         // While characters are available.
-        while (reader.bp < reader.buflen) {
-            // If possible close delimiter sequence.
-            if (reader.ch == '\"') {
-                // Check to see if enough double quotes are present.
-                int closeCount = countChar('\"', openCount);
-                if (openCount == closeCount) {
-                    // Good result.
-                    tk = Tokens.TokenKind.STRINGLITERAL;
+            while (!isEOF()) {
+                if (accept('\"')) {
                     return;
                 }
-                // False alarm, add double quotes to string buffer.
-                reader.repeat('\"', closeCount);
-            } else if (isEOLN()) {
+
+                if (isEOLN()) {
                 // Line terminator in string literal is an error.
                 // Fall out to unclosed string literal error.
-                if (openCount == 1) {
                     break;
-                }
-                skipLineTerminator();
-                // Add line terminator to string buffer.
-                reader.putChar('\n', false);
-                // Record first line terminator for error recovery.
-                if (firstEOLN == -1) {
-                    firstEOLN = reader.bp;
-                }
-            } else if (reader.ch == '\\') {
-                // Handle escape sequences.
-                hasEscapeSequences = true;
-                // Translate escapes immediately if TextBlockSupport is not available
-                // during bootstrapping.
-                boolean translateEscapesNow = !TextBlockSupport.hasSupport();
-                scanLitChar(pos, translateEscapesNow, openCount != 1);
             } else {
                 // Add character to string buffer.
-                reader.putChar(true);
+                    scanLitChar(pos);
+                }
             }
         }
+
         // String ended without close delimiter sequence.
-        lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
-        if (firstEOLN  != -1) {
-            // Reset recovery position to point after open delimiter sequence.
-            reader.reset(firstEOLN);
+        lexError(pos, isTextBlock ? Errors.UnclosedTextBlock : Errors.UnclosedStrLit);
+
+        if (firstEOLN  != NOT_FOUND) {
+            // Reset recovery position to point after text block open delimiter sequence.
+            reset(firstEOLN);
         }
     }
 
+    /**
+     * Scan sequence of digits.
+     *
+     * @param pos         position of the first character in literal.
+     * @param digitRadix  radix of numeric literal.
+     */
     private void scanDigits(int pos, int digitRadix) {
-        char saveCh;
-        int savePos;
+        int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
+        int trailingUnderscorePos;
+
         do {
-            if (reader.ch != '_') {
-                reader.putChar(false);
+            if (!is('_')) {
+                put();
+                trailingUnderscorePos = NOT_FOUND;
+            } else {
+                trailingUnderscorePos = position();
+            }
+
+            next();
+        } while (digit(pos, digitRadix) >= 0 || is('_'));
+
+        if (leadingUnderscorePos != NOT_FOUND) {
+            lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
+        } else if (trailingUnderscorePos != NOT_FOUND) {
+            lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
             }
-            saveCh = reader.ch;
-            savePos = reader.bp;
-            reader.scanChar();
-        } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
-        if (saveCh == '_')
-            lexError(savePos, Errors.IllegalUnderscore);
     }
 
-    /** Read fractional part of hexadecimal floating point number.
+    /**
+     * Read fractional part of hexadecimal floating point number.
+     *
+     * @param pos  position of the first character in literal.
      */
     private void scanHexExponentAndSuffix(int pos) {
-        if (reader.ch == 'p' || reader.ch == 'P') {
-            reader.putChar(true);
+        if (acceptOneOfThenPut('p', 'P')) {
             skipIllegalUnderscores();
-            if (reader.ch == '+' || reader.ch == '-') {
-                reader.putChar(true);
-            }
+            acceptOneOfThenPut('+', '-');
             skipIllegalUnderscores();
-            if (reader.digit(pos, 10) >= 0) {
+
+            if (digit(pos, 10) >= 0) {
                 scanDigits(pos, 10);
-                if (!hexFloatsWork)
-                    lexError(pos, Errors.UnsupportedCrossFpLit);
-            } else
+            } else {
                 lexError(pos, Errors.MalformedFpLit);
+            }
         } else {
             lexError(pos, Errors.MalformedFpLit);
         }
-        if (reader.ch == 'f' || reader.ch == 'F') {
-            reader.putChar(true);
+
+        if (acceptOneOfThenPut('f', 'F')) {
             tk = TokenKind.FLOATLITERAL;
             radix = 16;
         } else {
-            if (reader.ch == 'd' || reader.ch == 'D') {
-                reader.putChar(true);
-            }
+            acceptOneOfThenPut('d', 'D');
             tk = TokenKind.DOUBLELITERAL;
             radix = 16;
         }
     }
 
-    /** Read fractional part of floating point number.
+    /**
+     * Read fractional part of floating point number.
+     *
+     * @param pos  position of the first character in literal.
      */
     private void scanFraction(int pos) {
         skipIllegalUnderscores();
-        if (reader.digit(pos, 10) >= 0) {
+
+        if (digit(pos, 10) >= 0) {
             scanDigits(pos, 10);
         }
-        int sp1 = reader.sp;
-        if (reader.ch == 'e' || reader.ch == 'E') {
-            reader.putChar(true);
+
+        int index = sb.length();
+
+        if (acceptOneOfThenPut('e', 'E')) {
             skipIllegalUnderscores();
-            if (reader.ch == '+' || reader.ch == '-') {
-                reader.putChar(true);
-            }
+            acceptOneOfThenPut('+', '-');
             skipIllegalUnderscores();
-            if (reader.digit(pos, 10) >= 0) {
+
+            if (digit(pos, 10) >= 0) {
                 scanDigits(pos, 10);
                 return;
             }
+
             lexError(pos, Errors.MalformedFpLit);
-            reader.sp = sp1;
+            sb.setLength(index);
         }
     }
 
-    /** Read fractional part and 'd' or 'f' suffix of floating point number.
+    /**
+     * Read fractional part and 'd' or 'f' suffix of floating point number.
+     *
+     * @param pos  position of the first character in literal.
      */
     private void scanFractionAndSuffix(int pos) {
         radix = 10;
         scanFraction(pos);
-        if (reader.ch == 'f' || reader.ch == 'F') {
-            reader.putChar(true);
+
+        if (acceptOneOfThenPut('f', 'F')) {
             tk = TokenKind.FLOATLITERAL;
         } else {
-            if (reader.ch == 'd' || reader.ch == 'D') {
-                reader.putChar(true);
-            }
+            acceptOneOfThenPut('d', 'D');
             tk = TokenKind.DOUBLELITERAL;
         }
     }
 
-    /** Read fractional part and 'd' or 'f' suffix of floating point number.
+    /**
+     * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
+     *
+     * @param pos  position of the first character in literal.
      */
     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
         radix = 16;
-        Assert.check(reader.ch == '.');
-        reader.putChar(true);
+        Assert.check(is('.'));
+        putThenNext();
         skipIllegalUnderscores();
-        if (reader.digit(pos, 16) >= 0) {
+
+        if (digit(pos, 16) >= 0) {
             seendigit = true;
             scanDigits(pos, 16);
         }
+
         if (!seendigit)
             lexError(pos, Errors.InvalidHexNumber);
         else
             scanHexExponentAndSuffix(pos);
     }
 
+    /**
+     * Skip over underscores and report as a error if found.
+     */
     private void skipIllegalUnderscores() {
-        if (reader.ch == '_') {
-            lexError(reader.bp, Errors.IllegalUnderscore);
-            while (reader.ch == '_')
-                reader.scanChar();
+        if (is('_')) {
+            lexError(position(), Errors.IllegalUnderscore);
+            skip('_');
         }
     }
 
-    /** Read a number.
-     *  @param radix  The radix of the number; one of 2, 8, 10, 16.
+    /**
+     * Read a number. (Spec. 3.10)
+     *
+     * @param pos    position of the first character in literal.
+     * @param radix  the radix of the number; one of 2, 8, 10, 16.
      */
     private void scanNumber(int pos, int radix) {
         // for octal, allow base-10 digit in case it's a float literal
         this.radix = radix;
         int digitRadix = (radix == 8 ? 10 : radix);
-        int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
+        int firstDigit = digit(pos, Math.max(10, digitRadix));
         boolean seendigit = firstDigit >= 0;
         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
+
         if (seendigit) {
             scanDigits(pos, digitRadix);
         }
-        if (radix == 16 && reader.ch == '.') {
+
+        if (radix == 16 && is('.')) {
             scanHexFractionAndSuffix(pos, seendigit);
-        } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
+        } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
             scanHexExponentAndSuffix(pos);
-        } else if (digitRadix == 10 && reader.ch == '.') {
-            reader.putChar(true);
+        } else if (digitRadix == 10 && is('.')) {
+            putThenNext();
             scanFractionAndSuffix(pos);
-        } else if (digitRadix == 10 &&
-                   (reader.ch == 'e' || reader.ch == 'E' ||
-                    reader.ch == 'f' || reader.ch == 'F' ||
-                    reader.ch == 'd' || reader.ch == 'D')) {
+        } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {
             scanFractionAndSuffix(pos);
         } else {
             if (!seenValidDigit) {
                 switch (radix) {
                 case 2:

@@ -643,27 +636,35 @@
                 case 16:
                     lexError(pos, Errors.InvalidHexNumber);
                     break;
                 }
             }
-            if (reader.ch == 'l' || reader.ch == 'L') {
-                reader.scanChar();
+
+            if (acceptOneOf('l', 'L')) {
                 tk = TokenKind.LONGLITERAL;
             } else {
                 tk = TokenKind.INTLITERAL;
             }
         }
     }
 
-    /** Read an identifier.
+    /**
+     * Determines if the sequence in the literal buffer is a token (keyword, operator.)
+     */
+    private void checkIdent() {
+        name = names.fromString(sb.toString());
+        tk = tokens.lookupKind(name);
+    }
+
+    /**
+     * Read an identifier. (Spec. 3.8)
      */
     private void scanIdent() {
-        boolean isJavaIdentifierPart;
-        char high;
-        reader.putChar(true);
+        putThenNext();
+
         do {
-            switch (reader.ch) {
+            switch (get()) {
             case 'A': case 'B': case 'C': case 'D': case 'E':
             case 'F': case 'G': case 'H': case 'I': case 'J':
             case 'K': case 'L': case 'M': case 'N': case 'O':
             case 'P': case 'Q': case 'R': case 'S': case 'T':
             case 'U': case 'V': case 'W': case 'X': case 'Y':

@@ -676,123 +677,135 @@
             case 'z':
             case '$': case '_':
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7': case '8': case '9':
                 break;
+
             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
             case '\u0015': case '\u0016': case '\u0017':
             case '\u0018': case '\u0019': case '\u001B':
             case '\u007F':
-                reader.scanChar();
+                next();
                 continue;
+
             case '\u001A': // EOI is also a legal identifier part
-                if (reader.bp >= reader.buflen) {
-                    name = reader.name();
-                    tk = tokens.lookupKind(name);
+                if (isEOF()) {
+                    checkIdent();
                     return;
                 }
-                reader.scanChar();
+
+                next();
                 continue;
+
             default:
-                if (reader.ch < '\u0080') {
+                boolean isJavaIdentifierPart;
+
+                if (isASCII()) {
                     // all ASCII range chars already handled, above
                     isJavaIdentifierPart = false;
                 } else {
-                    if (Character.isIdentifierIgnorable(reader.ch)) {
-                        reader.scanChar();
+                    if (Character.isIdentifierIgnorable(get())) {
+                        next();
                         continue;
-                    } else {
-                        int codePoint = reader.peekSurrogates();
-                        if (codePoint >= 0) {
-                            if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
-                                reader.putChar(true);
-                            }
-                        } else {
-                            isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
-                        }
                     }
+
+                    isJavaIdentifierPart = isSurrogate()
+                            ? Character.isJavaIdentifierPart(getCodepoint())
+                            : Character.isJavaIdentifierPart(get());
                 }
+
                 if (!isJavaIdentifierPart) {
-                    name = reader.name();
-                    tk = tokens.lookupKind(name);
+                    checkIdent();
                     return;
                 }
             }
-            reader.putChar(true);
+
+            putThenNext();
         } while (true);
     }
 
-    /** Return true if reader.ch can be part of an operator.
+    /**
+     * Return true if ch can be part of an operator.
+     *
+     * @param ch  character to check.
+     *
+     * @return true if ch can be part of an operator.
      */
     private boolean isSpecial(char ch) {
         switch (ch) {
         case '!': case '%': case '&': case '*': case '?':
         case '+': case '-': case ':': case '<': case '=':
         case '>': case '^': case '|': case '~':
         case '@':
             return true;
+
         default:
             return false;
         }
     }
 
-    /** Read longest possible sequence of special characters and convert
-     *  to token.
+    /**
+     * Read longest possible sequence of special characters and convert to token.
      */
     private void scanOperator() {
         while (true) {
-            reader.putChar(false);
-            Name newname = reader.name();
-            TokenKind tk1 = tokens.lookupKind(newname);
-            if (tk1 == TokenKind.IDENTIFIER) {
-                reader.sp--;
+            put();
+            TokenKind newtk = tokens.lookupKind(sb.toString());
+
+            if (newtk == TokenKind.IDENTIFIER) {
+                sb.setLength(sb.length() - 1);
+                break;
+            }
+
+            tk = newtk;
+            next();
+
+            if (!isSpecial(get())) {
                 break;
             }
-            tk = tk1;
-            reader.scanChar();
-            if (!isSpecial(reader.ch)) break;
         }
     }
 
-    /** Read token.
+    /**
+     * Read token (main entrypoint.)
      */
     public Token readToken() {
-
-        reader.sp = 0;
+        sb.setLength(0);
         name = null;
         radix = 0;
+        isTextBlock = false;
+        hasEscapeSequences = false;
 
-        int pos = 0;
-        int endPos = 0;
+        int pos;
         List<Comment> comments = null;
 
         try {
             loop: while (true) {
-                pos = reader.bp;
-                switch (reader.ch) {
+                pos = position();
+
+                switch (get()) {
                 case ' ': // (Spec 3.6)
                 case '\t': // (Spec 3.6)
-                case FF: // (Spec 3.6)
-                    do {
-                        reader.scanChar();
-                    } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
-                    processWhiteSpace(pos, reader.bp);
+                case '\f': // (Spec 3.6)
+                    skipWhitespace();
+                    processWhiteSpace(pos, position());
                     break;
-                case LF: // (Spec 3.4)
-                    reader.scanChar();
-                    processLineTerminator(pos, reader.bp);
+
+                case '\n': // (Spec 3.4)
+                    next();
+                    processLineTerminator(pos, position());
                     break;
-                case CR: // (Spec 3.4)
-                    reader.scanChar();
-                    if (reader.ch == LF) {
-                        reader.scanChar();
-                    }
-                    processLineTerminator(pos, reader.bp);
+
+                case '\r': // (Spec 3.4)
+                    next();
+                    accept('\n');
+                    processLineTerminator(pos, position());
                     break;
+
                 case 'A': case 'B': case 'C': case 'D': case 'E':
                 case 'F': case 'G': case 'H': case 'I': case 'J':
                 case 'K': case 'L': case 'M': case 'N': case 'O':
                 case 'P': case 'Q': case 'R': case 'S': case 'T':
                 case 'U': case 'V': case 'W': case 'X': case 'Y':

@@ -801,196 +814,235 @@
                 case 'f': case 'g': case 'h': case 'i': case 'j':
                 case 'k': case 'l': case 'm': case 'n': case 'o':
                 case 'p': case 'q': case 'r': case 's': case 't':
                 case 'u': case 'v': case 'w': case 'x': case 'y':
                 case 'z':
-                case '$': case '_':
+                case '$': case '_': // (Spec. 3.8)
                     scanIdent();
                     break loop;
-                case '0':
-                    reader.scanChar();
-                    if (reader.ch == 'x' || reader.ch == 'X') {
-                        reader.scanChar();
+
+                case '0': // (Spec. 3.10)
+                    next();
+
+                    if (acceptOneOf('x', 'X')) {
                         skipIllegalUnderscores();
                         scanNumber(pos, 16);
-                    } else if (reader.ch == 'b' || reader.ch == 'B') {
-                        reader.scanChar();
+                    } else if (acceptOneOf('b', 'B')) {
                         skipIllegalUnderscores();
                         scanNumber(pos, 2);
                     } else {
-                        reader.putChar('0');
-                        if (reader.ch == '_') {
-                            int savePos = reader.bp;
-                            do {
-                                reader.scanChar();
-                            } while (reader.ch == '_');
-                            if (reader.digit(pos, 10) < 0) {
+                        put('0');
+
+                        if (is('_')) {
+                            int savePos = position();
+                            skip('_');
+
+                            if (digit(pos, 10) < 0) {
                                 lexError(savePos, Errors.IllegalUnderscore);
                             }
                         }
+
                         scanNumber(pos, 8);
                     }
                     break loop;
+
                 case '1': case '2': case '3': case '4':
-                case '5': case '6': case '7': case '8': case '9':
+                case '5': case '6': case '7': case '8': case '9':  // (Spec. 3.10)
                     scanNumber(pos, 10);
                     break loop;
-                case '.':
-                    reader.scanChar();
-                    if (reader.digit(pos, 10) >= 0) {
-                        reader.putChar('.');
-                        scanFractionAndSuffix(pos);
-                    } else if (reader.ch == '.') {
-                        int savePos = reader.bp;
-                        reader.putChar('.'); reader.putChar('.', true);
-                        if (reader.ch == '.') {
-                            reader.scanChar();
-                            reader.putChar('.');
+
+                case '.': // (Spec. 3.12)
+                    if (accept("...")) {
+                        put("...");
                             tk = TokenKind.ELLIPSIS;
                         } else {
+                        next();
+                        int savePos = position();
+
+                        if (accept('.')) {
                             lexError(savePos, Errors.IllegalDot);
-                        }
+                        } else if (digit(pos, 10) >= 0) {
+                            put('.');
+                            scanFractionAndSuffix(pos); // (Spec. 3.10)
                     } else {
                         tk = TokenKind.DOT;
                     }
+                    }
+                    break loop;
+
+                case ',': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.COMMA;
+                    break loop;
+
+                case ';': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.SEMI;
                     break loop;
-                case ',':
-                    reader.scanChar(); tk = TokenKind.COMMA; break loop;
-                case ';':
-                    reader.scanChar(); tk = TokenKind.SEMI; break loop;
-                case '(':
-                    reader.scanChar(); tk = TokenKind.LPAREN; break loop;
-                case ')':
-                    reader.scanChar(); tk = TokenKind.RPAREN; break loop;
-                case '[':
-                    reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
-                case ']':
-                    reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
-                case '{':
-                    reader.scanChar(); tk = TokenKind.LBRACE; break loop;
-                case '}':
-                    reader.scanChar(); tk = TokenKind.RBRACE; break loop;
+
+                case '(': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.LPAREN;
+                    break loop;
+
+                case ')': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.RPAREN;
+                    break loop;
+
+                case '[': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.LBRACKET;
+                    break loop;
+
+                case ']': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.RBRACKET;
+                    break loop;
+
+                case '{': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.LBRACE;
+                    break loop;
+
+                case '}': // (Spec. 3.12)
+                    next();
+                    tk = TokenKind.RBRACE;
+                    break loop;
+
                 case '/':
-                    reader.scanChar();
-                    if (reader.ch == '/') {
-                        do {
-                            reader.scanCommentChar();
-                        } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
-                        if (reader.bp < reader.buflen) {
-                            comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
+                    next();
+
+                    if (accept('/')) { // (Spec. 3.7)
+                        skipToEOLN();
+
+                        if (!isEOF()) {
+                            comments = appendComment(comments, processComment(pos, position(), CommentStyle.LINE));
                         }
                         break;
-                    } else if (reader.ch == '*') {
+                    } else if (accept('*')) { // (Spec. 3.7)
                         boolean isEmpty = false;
-                        reader.scanChar();
                         CommentStyle style;
-                        if (reader.ch == '*') {
+
+                        if (accept('*')) {
                             style = CommentStyle.JAVADOC;
-                            reader.scanCommentChar();
-                            if (reader.ch == '/') {
+
+                            if (is('/')) {
                                 isEmpty = true;
                             }
                         } else {
                             style = CommentStyle.BLOCK;
                         }
-                        while (!isEmpty && reader.bp < reader.buflen) {
-                            if (reader.ch == '*') {
-                                reader.scanChar();
-                                if (reader.ch == '/') break;
+
+                        if (!isEmpty) {
+                            while (!isEOF()) {
+                                if (accept('*')) {
+                                    if (is('/')) {
+                                        break;
+                                    }
                             } else {
-                                reader.scanCommentChar();
+                                    next();
+                                }
                             }
                         }
-                        if (reader.ch == '/') {
-                            reader.scanChar();
-                            comments = addComment(comments, processComment(pos, reader.bp, style));
+
+                        if (accept('/')) {
+                            comments = appendComment(comments, processComment(pos, position(), style));
+
                             break;
                         } else {
                             lexError(pos, Errors.UnclosedComment);
+
                             break loop;
                         }
-                    } else if (reader.ch == '=') {
-                        tk = TokenKind.SLASHEQ;
-                        reader.scanChar();
+                    } else if (accept('=')) {
+                        tk = TokenKind.SLASHEQ; // (Spec. 3.12)
                     } else {
-                        tk = TokenKind.SLASH;
+                        tk = TokenKind.SLASH; // (Spec. 3.12)
                     }
                     break loop;
-                case '\'':
-                    reader.scanChar();
-                    if (reader.ch == '\'') {
+
+                case '\'': // (Spec. 3.10)
+                    next();
+
+                    if (accept('\'')) {
                         lexError(pos, Errors.EmptyCharLit);
-                        reader.scanChar();
                     } else {
-                        if (isEOLN())
+                        if (isEOLN()) {
                             lexError(pos, Errors.IllegalLineEndInCharLit);
-                        scanLitChar(pos, true, false);
-                        if (reader.ch == '\'') {
-                            reader.scanChar();
+                        }
+
+                        scanLitChar(pos);
+
+                        if (accept('\'')) {
                             tk = TokenKind.CHARLITERAL;
                         } else {
                             lexError(pos, Errors.UnclosedCharLit);
                         }
                     }
                     break loop;
-                case '\"':
+
+                case '\"': // (Spec. 3.10)
                     scanString(pos);
                     break loop;
+
                 default:
-                    if (isSpecial(reader.ch)) {
+                    if (isSpecial(get())) {
                         scanOperator();
                     } else {
                         boolean isJavaIdentifierStart;
-                        int codePoint = -1;
-                        if (reader.ch < '\u0080') {
+
+                        if (isASCII()) {
                             // all ASCII range chars already handled, above
                             isJavaIdentifierStart = false;
                         } else {
-                            codePoint = reader.peekSurrogates();
-                            if (codePoint >= 0) {
-                                if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
-                                    reader.putChar(true);
-                                }
-                            } else {
-                                isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
-                            }
+                            isJavaIdentifierStart = isSurrogate()
+                                    ? Character.isJavaIdentifierStart(getCodepoint())
+                                    : Character.isJavaIdentifierStart(get());
                         }
+
                         if (isJavaIdentifierStart) {
                             scanIdent();
-                        } else if (reader.digit(pos, 10) >= 0) {
+                        } else if (digit(pos, 10) >= 0) {
                             scanNumber(pos, 10);
-                        } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
+                        } else if (is((char)EOI) || isEOF()) {
                             tk = TokenKind.EOF;
-                            pos = reader.realLength;
+                            pos = position();
                         } else {
                             String arg;
 
-                            if (codePoint >= 0) {
-                                char high = reader.ch;
-                                reader.scanChar();
-                                arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
-                            } else {
-                                arg = (32 < reader.ch && reader.ch < 127) ?
-                                                String.format("%s", reader.ch) :
-                                                String.format("\\u%04x", (int)reader.ch);
+                            if (isSurrogate()) {
+                                int codePoint = getCodepoint();
+                                char hi = Character.highSurrogate(codePoint);
+                                char lo = Character.lowSurrogate(codePoint);
+                                arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
+                            } else {
+                                char ch = get();
+                                arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
+                                                              String.format("\\u%04x", (int) ch);
                             }
+
                             lexError(pos, Errors.IllegalChar(arg));
-                            reader.scanChar();
+                            next();
                         }
                     }
                     break loop;
                 }
             }
-            endPos = reader.bp;
-            switch (tk.tag) {
-                case DEFAULT: return new Token(tk, pos, endPos, comments);
-                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
-                case STRING: {
+
+            int endPos = position();
+
+            if (tk.tag == Token.Tag.DEFAULT) {
+                return new Token(tk, pos, endPos, comments);
+            } else  if (tk.tag == Token.Tag.NAMED) {
+                return new NamedToken(tk, pos, endPos, name, comments);
+            } else {
                     // Get characters from string buffer.
-                    String string = reader.chars();
+                String string = sb.toString();
+
                     // If a text block.
-                    if (isTextBlock && TextBlockSupport.hasSupport()) {
+                if (isTextBlock) {
                         // Verify that the incidental indentation is consistent.
                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
                             Set<TextBlockSupport.WhitespaceChecks> checks =
                                     TextBlockSupport.checkWhitespace(string);
                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {

@@ -1002,218 +1054,282 @@
                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
                             }
                         }
                         // Remove incidental indentation.
                         try {
-                            string = TextBlockSupport.stripIndent(string);
+                        string = string.stripIndent();
                         } catch (Exception ex) {
                             // Error already reported, just use unstripped string.
                         }
                     }
+
                     // Translate escape sequences if present.
-                    if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
+                if (hasEscapeSequences) {
                         try {
-                            string = TextBlockSupport.translateEscapes(string);
+                        string = string.translateEscapes();
                         } catch (Exception ex) {
                             // Error already reported, just use untranslated string.
                         }
                     }
+
+                if (tk.tag == Token.Tag.STRING) {
                     // Build string token.
                     return new StringToken(tk, pos, endPos, string, comments);
-                }
-                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
-                default: throw new AssertionError();
+                } else {
+                    // Build numeric token.
+                    return new NumericToken(tk, pos, endPos, string, radix, comments);
             }
         }
-        finally {
+        } finally {
+            int endPos = position();
+
             if (scannerDebug) {
                     System.out.println("nextToken(" + pos
                                        + "," + endPos + ")=|" +
-                                       new String(reader.getRawCharacters(pos, endPos))
+                                       new String(getRawCharacters(pos, endPos))
                                        + "|");
             }
         }
     }
-    //where
-        List<Comment> addComment(List<Comment> comments, Comment comment) {
+
+    /**
+     * Appends a comment to the list of comments preceding the current token.
+     *
+     * @param comments  existing list of comments.
+     * @param comment   comment to append.
+     *
+     * @return new list with comment prepended to the existing list.
+     */
+    List<Comment> appendComment(List<Comment> comments, Comment comment) {
             return comments == null ?
                     List.of(comment) :
                     comments.prepend(comment);
         }
 
-    /** Return the position where a lexical error occurred;
+    /**
+     * Return the position where a lexical error occurred.
+     *
+     * @return position in the input buffer of where the error occurred.
      */
     public int errPos() {
         return errPos;
     }
 
-    /** Set the position where a lexical error occurred;
+    /**
+     * Set the position where a lexical error occurred.
+     *
+     * @param pos  position in the input buffer of where the error occurred.
      */
     public void errPos(int pos) {
         errPos = pos;
     }
 
     /**
      * Called when a complete comment has been scanned. pos and endPos
      * will mark the comment boundary.
+     *
+     * @param pos     position of the opening / in the input buffer.
+     * @param endPos  position + 1 of the closing / in the input buffer.
+     * @param style   style of comment.
+     *
+     * @return the constructed BasicComment.
      */
     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
-        if (scannerDebug)
+        if (scannerDebug) {
             System.out.println("processComment(" + pos
                                + "," + endPos + "," + style + ")=|"
-                               + new String(reader.getRawCharacters(pos, endPos))
+                                + new String(getRawCharacters(pos, endPos))
                                + "|");
-        char[] buf = reader.getRawCharacters(pos, endPos);
-        return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
+        }
+
+        char[] buf = getRawCharacters(pos, endPos);
+
+        return new BasicComment(style, fac, buf, pos);
     }
 
     /**
      * Called when a complete whitespace run has been scanned. pos and endPos
      * will mark the whitespace boundary.
+     *
+     * (Spec 3.6)
+     *
+     * @param pos     position in input buffer of first whitespace character.
+     * @param endPos  position + 1 in input buffer of last whitespace character.
      */
     protected void processWhiteSpace(int pos, int endPos) {
-        if (scannerDebug)
+        if (scannerDebug) {
             System.out.println("processWhitespace(" + pos
                                + "," + endPos + ")=|" +
-                               new String(reader.getRawCharacters(pos, endPos))
+                                new String(getRawCharacters(pos, endPos))
                                + "|");
     }
+    }
 
     /**
      * Called when a line terminator has been processed.
+     *
+     * @param pos     position in input buffer of first character in sequence.
+     * @param endPos  position + 1 in input buffer of last character in sequence.
      */
     protected void processLineTerminator(int pos, int endPos) {
-        if (scannerDebug)
+        if (scannerDebug) {
             System.out.println("processTerminator(" + pos
                                + "," + endPos + ")=|" +
-                               new String(reader.getRawCharacters(pos, endPos))
+                                new String(getRawCharacters(pos, endPos))
                                + "|");
     }
+    }
 
-    /** Build a map for translating between line numbers and
-     * positions in the input.
+    /**
+     * Build a map for translating between line numbers and positions in the input.
      *
-     * @return a LineMap */
+     * @return a LineMap
+     */
     public Position.LineMap getLineMap() {
-        return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
+        return Position.makeLineMap(getRawCharacters(), length(), false);
     }
 
-
     /**
     * Scan a documentation comment; determine if a deprecated tag is present.
     * Called once the initial /, * have been skipped, positioned at the second *
     * (which is treated as the beginning of the first line).
     * Stops positioned at the closing '/'.
     */
-    protected static class BasicComment<U extends UnicodeReader> implements Comment {
-
+    protected static class BasicComment extends PositionTrackingReader implements Comment {
+        /**
+         * Style of comment
+         *   LINE starting with //
+         *   BLOCK starting with /*
+         *   JAVADOC starting with /**
+         */
         CommentStyle cs;
-        U comment_reader;
 
+        /**
+         * true if comment contains @deprecated at beginning of a line.
+         */
         protected boolean deprecatedFlag = false;
+
+        /**
+         * true if comment has been fully scanned.
+         */
         protected boolean scanned = false;
 
-        protected BasicComment(U comment_reader, CommentStyle cs) {
-            this.comment_reader = comment_reader;
+        /**
+         * Constructor.
+         *
+         * @param cs      comment style
+         * @param sf      Scan factory.
+         * @param array   Array containing contents of source.
+         * @param offset  Position offset in original source buffer.
+         */
+        protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
+            super(sf, array, offset);
             this.cs = cs;
         }
 
+        /**
+         * Return comment body text minus comment adornments or null if not scanned.
+         *
+         * @return comment body text.
+         */
         public String getText() {
             return null;
         }
 
+        /**
+         * Return buffer position in original buffer mapped from buffer position in comment.
+         *
+         * @param pos  buffer position in comment.
+         *
+         * @return buffer position in original buffer.
+         */
         public int getSourcePos(int pos) {
             return -1;
         }
 
+        /**
+         * Return style of comment.
+         *   LINE starting with //
+         *   BLOCK starting with /*
+         *   JAVADOC starting with /**
+         *
+         * @return
+         */
         public CommentStyle getStyle() {
             return cs;
         }
 
+        /**
+         * true if comment contains @deprecated at beginning of a line.
+         *
+         * @return true if comment contains @deprecated.
+         */
         public boolean isDeprecated() {
             if (!scanned && cs == CommentStyle.JAVADOC) {
                 scanDocComment();
             }
+
             return deprecatedFlag;
         }
 
-        @SuppressWarnings("fallthrough")
+        /**
+         * Scan JAVADOC comment for details.
+         */
         protected void scanDocComment() {
             try {
                 boolean deprecatedPrefix = false;
-
-                comment_reader.bp += 3; // '/**'
-                comment_reader.ch = comment_reader.buf[comment_reader.bp];
+                accept("/**");
 
                 forEachLine:
-                while (comment_reader.bp < comment_reader.buflen) {
-
+                while (!isEOF()) {
                     // Skip optional WhiteSpace at beginning of line
-                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
-                        comment_reader.scanCommentChar();
-                    }
+                    skipWhitespace();
 
                     // Skip optional consecutive Stars
-                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
-                        comment_reader.scanCommentChar();
-                        if (comment_reader.ch == '/') {
+                    while (accept('*')) {
+                        if (is('/')) {
                             return;
                         }
                     }
 
                     // Skip optional WhiteSpace after Stars
-                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
-                        comment_reader.scanCommentChar();
-                    }
+                    skipWhitespace();
 
-                    deprecatedPrefix = false;
                     // At beginning of line in the JavaDoc sense.
-                    if (!deprecatedFlag) {
-                        String deprecated = "@deprecated";
-                        int i = 0;
-                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
-                            comment_reader.scanCommentChar();
-                            i++;
-                            if (i == deprecated.length()) {
-                                deprecatedPrefix = true;
-                                break;
-                            }
-                        }
-                    }
+                    deprecatedPrefix = deprecatedFlag || accept("@deprecated");
 
-                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
-                        if (Character.isWhitespace(comment_reader.ch)) {
+                    if (deprecatedPrefix && !isEOF()) {
+                        if (Character.isWhitespace(get())) {
                             deprecatedFlag = true;
-                        } else if (comment_reader.ch == '*') {
-                            comment_reader.scanCommentChar();
-                            if (comment_reader.ch == '/') {
+                        } else if (accept('*')) {
+                            if (is('/')) {
                                 deprecatedFlag = true;
                                 return;
                             }
                         }
                     }
 
                     // Skip rest of line
-                    while (comment_reader.bp < comment_reader.buflen) {
-                        switch (comment_reader.ch) {
+                    while (!isEOF()) {
+                        switch (get()) {
                             case '*':
-                                comment_reader.scanCommentChar();
-                                if (comment_reader.ch == '/') {
+                                next();
+
+                                if (is('/')) {
                                     return;
                                 }
+
                                 break;
-                            case CR: // (Spec 3.4)
-                                comment_reader.scanCommentChar();
-                                if (comment_reader.ch != LF) {
-                                    continue forEachLine;
-                                }
-                            /* fall through to LF case */
-                            case LF: // (Spec 3.4)
-                                comment_reader.scanCommentChar();
+                            case '\r': // (Spec 3.4)
+                            case '\n': // (Spec 3.4)
+                                accept('\r');
+                                accept('\n');
                                 continue forEachLine;
+
                             default:
-                                comment_reader.scanCommentChar();
+                                next();
+                                break;
                         }
                     } // rest of line
                 } // forEachLine
                 return;
             } finally {

< prev index next >