--- old/make/jdk/src/classes/build/tools/dtdbuilder/DTDParser.java 2020-03-23 19:57:02.563962510 +0100 +++ /dev/null 2020-02-11 10:29:13.086348146 +0100 @@ -1,927 +0,0 @@ -/* - * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package build.tools.dtdbuilder; - -import javax.swing.text.html.parser.*; -import java.net.URL; -import java.io.IOException; -import java.io.InputStream; -import java.util.Enumeration; -import java.util.Vector; -import java.util.Hashtable; -import java.util.BitSet; -import java.text.MessageFormat; - -/** - * A parser for DTDs. This parser roughly corresponds to the - * rules specified in "The SGML Handbook" by Charles F. Goldfarb. - * The end result of parsing the stream is a DTD object. - * - * - * @see DTD - * @see DTDInputStream - * @author Arthur van Hoff - */ -final -class DTDParser implements DTDConstants { - DTDBuilder dtd; - DTDInputStream in; - int ch; - char str[] = new char[128]; - int strpos = 0; - int nerrors = 0; - - /** - * Report an error. - */ - void error(String err, String arg1, String arg2, String arg3) { - nerrors++; - - String msgParams[] = {arg1, arg2, arg3}; - - String str = getSubstProp("dtderr." + err, msgParams); - if (str == null) { - str = err + "[" + arg1 + "," + arg2 + "," + arg3 + "]"; - } - System.err.println("line " + in.ln + ", dtd " + dtd + ": " + str); - } - void error(String err, String arg1, String arg2) { - error(err, arg1, arg2, "?"); - } - void error(String err, String arg1) { - error(err, arg1, "?", "?"); - } - void error(String err) { - error(err, "?", "?", "?"); - } - - private String getSubstProp(String propName, String args[]) { - String prop = System.getProperty(propName); - - if (prop == null) { - return null; - } - - return MessageFormat.format(prop, (Object[])args); - } - - /** - * Expect a character. - */ - boolean expect(int c) throws IOException { - if (ch != c) { - char str[] = {(char)c}; - error("expected", "'" + new String(str) + "'"); - return false; - } - ch = in.read(); - return true; - } - - /** - * Add a char to the string buffer. - */ - void addString(int c) { - if (strpos == str.length) { - char newstr[] = new char[str.length * 2]; - System.arraycopy(str, 0, newstr, 0, str.length); - str = newstr; - } - str[strpos++] = (char)c; - } - - /** - * Get the string which was accumulated in the buffer. - * Pos is the starting position of the string. - */ - String getString(int pos) { - char newstr[] = new char[strpos - pos]; - System.arraycopy(str, pos, newstr, 0, strpos - pos); - strpos = pos; - return new String(newstr); - } - - /** - * Get the chars which were accumulated in the buffer. - * Pos is the starting position of the string. - */ - char[] getChars(int pos) { - char newstr[] = new char[strpos - pos]; - System.arraycopy(str, pos, newstr, 0, strpos - pos); - strpos = pos; - return newstr; - } - - /** - * Skip spaces. [5] 297:23 - */ - void skipSpace() throws IOException { - while (true) { - switch (ch) { - case '\n': - case ' ': - case '\t': - ch = in.read(); - break; - - default: - return; - } - } - } - - /** - * Skip tag spaces (includes comments). [65] 372:1 - */ - void skipParameterSpace() throws IOException { - while (true) { - switch (ch) { - case '\n': - case ' ': - case '\t': - ch = in.read(); - break; - case '-': - if ((ch = in.read()) != '-') { - in.push(ch); - ch = '-'; - return; - } - - in.replace++; - while (true) { - switch (ch = in.read()) { - case '-': - if ((ch = in.read()) == '-') { - ch = in.read(); - in.replace--; - skipParameterSpace(); - return; - } - break; - - case -1: - error("eof.arg", "comment"); - in.replace--; - return; - } - } - default: - return; - } - } - } - - /** - * Parse identifier. Uppercase characters are automatically - * folded to lowercase. Returns falsed if no identifier is found. - */ - @SuppressWarnings("fallthrough") - boolean parseIdentifier(boolean lower) throws IOException { - switch (ch) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - if (lower) { - ch = 'a' + (ch - 'A'); - } - /* fall through */ - - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - break; - - default: - return false; - } - - addString(ch); - ch = in.read(); - parseNameToken(lower); - return true; - } - - /** - * Parses name token. If lower is true, upper case letters - * are folded to lower case. Returns falsed if no token is found. - */ - @SuppressWarnings("fallthrough") - boolean parseNameToken(boolean lower) throws IOException { - boolean first = true; - - while (true) { - switch (ch) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - if (lower) { - ch = 'a' + (ch - 'A'); - } - /* fall through */ - - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - - case '.': case '-': - addString(ch); - ch = in.read(); - first = false; - break; - - default: - return !first; - } - } - } - - /** - * Parse a list of identifiers. - */ - Vector parseIdentifierList(boolean lower) throws IOException { - Vector elems = new Vector<>(); - skipSpace(); - switch (ch) { - case '(': - ch = in.read(); - skipParameterSpace(); - while (parseNameToken(lower)) { - elems.addElement(getString(0)); - skipParameterSpace(); - if (ch == '|') { - ch = in.read(); - skipParameterSpace(); - } - } - expect(')'); - skipParameterSpace(); - break; - - default: - if (!parseIdentifier(lower)) { - error("expected", "identifier"); - break; - } - elems.addElement(getString(0)); - skipParameterSpace(); - break; - } - return elems; - } - - /** - * Parse and Entity reference. Should be called when - * a & is encountered. The data is put in the string buffer. - * [59] 350:17 - */ - private void parseEntityReference() throws IOException { - int pos = strpos; - - if ((ch = in.read()) == '#') { - int n = 0; - ch = in.read(); - if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) { - addString('#'); - } else { - while ((ch >= '0') && (ch <= '9')) { - n = (n * 10) + ch - '0'; - ch = in.read(); - } - if ((ch == ';') || (ch == '\n')) { - ch = in.read(); - } - addString(n); - return; - } - } - - while (true) { - switch (ch) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - - case '.': case '-': - addString(ch); - ch = in.read(); - break; - - default: - if (strpos == pos) { - addString('&'); - return; - } - String nm = getString(pos); - Entity ent = dtd.getEntity(nm); - if (ent == null) { - error("undef.entref" + nm); - return; - } - if ((ch == ';') || (ch == '\n')) { - ch = in.read(); - } - char data[] = ent.getData(); - for (int i = 0 ; i < data.length ; i++) { - addString(data[i]); - } - return; - } - } - } - - /** - * Parse an entity declaration. - * [101] 394:18 - * REMIND: external entity type - */ - private void parseEntityDeclaration() throws IOException { - int type = GENERAL; - - skipSpace(); - if (ch == '%') { - ch = in.read(); - type = PARAMETER; - skipSpace(); - } - if (ch == '#') { - addString('#'); - ch = in.read(); - } - if (!parseIdentifier(false)) { - error("expected", "identifier"); - return; - } - String nm = getString(0); - skipParameterSpace(); - if (parseIdentifier(false)) { - String tnm = getString(0); - int t = Entity.name2type(tnm); - if (t == 0) { - error("invalid.arg", "entity type", tnm); - } else { - type |= t; - } - skipParameterSpace(); - } - - if ((ch != '"') && (ch != '\'')) { - error("expected", "entity value"); - skipParameterSpace(); - if (ch == '>') { - ch = in.read(); - } - return; - } - - int term = ch; - ch = in.read(); - while ((ch != -1) && (ch != term)) { - if (ch == '&') { - parseEntityReference(); - } else { - addString(ch & 0xFF); - ch = in.read(); - } - } - if (ch == term) { - ch = in.read(); - } - if (in.replace == 0) { - char data[] = getChars(0); - dtd.defineEntity(nm, type, data); - } else { - strpos = 0; - } - skipParameterSpace(); - expect('>'); - } - - /** - * Parse content model. - * [126] 410:1 - * REMIND: data tag group - */ - ContentModel parseContentModel() throws IOException { - ContentModel m = null; - - switch (ch) { - case '(': - ch = in.read(); - skipParameterSpace(); - ContentModel e = parseContentModel(); - - if (ch != ')') { - m = new ContentModel(ch, e); - do { - ch = in.read(); - skipParameterSpace(); - e.next = parseContentModel(); - if (e.next.type == m.type) { - e.next = (ContentModel)e.next.content; - } - for (; e.next != null ; e = e.next); - } while (ch == m.type); - } else { - m = new ContentModel(',', e); - } - expect(')'); - break; - - case '#': - ch = in.read(); - if (parseIdentifier(true)) { - m = new ContentModel('*', new ContentModel(dtd.getElement("#" + getString(0)))); - } else { - error("invalid", "content model"); - } - break; - - default: - if (parseIdentifier(true)) { - m = new ContentModel(dtd.getElement(getString(0))); - } else { - error("invalid", "content model"); - } - break; - } - - switch (ch) { - case '?': - case '*': - case '+': - m = new ContentModel(ch, m); - ch = in.read(); - break; - } - skipParameterSpace(); - - return m; - } - - /** - * Parse element declaration. - * [116] 405:6 - */ - void parseElementDeclaration() throws IOException { - Vector elems = parseIdentifierList(true); - BitSet inclusions = null; - BitSet exclusions = null; - boolean omitStart = false; - boolean omitEnd = false; - - if ((ch == '-') || (ch == 'O')) { - omitStart = ch == 'O'; - ch = in.read(); - skipParameterSpace(); - - if ((ch == '-') || (ch == 'O')) { - omitEnd = ch == 'O'; - ch = in.read(); - skipParameterSpace(); - } else { - expect('-'); - } - } - - int type = MODEL; - ContentModel content = null; - if (parseIdentifier(false)) { - String nm = getString(0); - type = Element.name2type(nm); - if (type == 0) { - error("invalid.arg", "content type", nm); - type = EMPTY; - } - skipParameterSpace(); - } else { - content = parseContentModel(); - } - - if ((type == MODEL) || (type == ANY)) { - if (ch == '-') { - ch = in.read(); - Vector v = parseIdentifierList(true); - exclusions = new BitSet(); - for (Enumeration e = v.elements() ; e.hasMoreElements() ;) { - exclusions.set(dtd.getElement(e.nextElement()).getIndex()); - } - } - if (ch == '+') { - ch = in.read(); - Vector v = parseIdentifierList(true); - inclusions = new BitSet(); - for (Enumeration e = v.elements() ; e.hasMoreElements() ;) { - inclusions.set(dtd.getElement(e.nextElement()).getIndex()); - } - } - } - expect('>'); - - if (in.replace == 0) { - for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) { - dtd.defineElement(e.nextElement(), type, omitStart, omitEnd, content, exclusions, inclusions, null); - } - } - } - - /** - * Parse an attribute declared value. - * [145] 422:6 - */ - void parseAttributeDeclaredValue(AttributeList atts) throws IOException { - if (ch == '(') { - atts.values = parseIdentifierList(true); - atts.type = NMTOKEN; - return; - } - if (!parseIdentifier(false)) { - error("invalid", "attribute value"); - return; - } - atts.type = AttributeList.name2type(getString(0)); - skipParameterSpace(); - if (atts.type == NOTATION) { - atts.values = parseIdentifierList(true); - } - } - - /** - * Parse an attribute value specification. - * [33] 331:1 - */ - @SuppressWarnings("fallthrough") - String parseAttributeValueSpecification() throws IOException { - int delim = -1; - switch (ch) { - case '\'': - case '"': - delim = ch; - ch = in.read(); - } - while (true) { - switch (ch) { - case -1: - error("eof.arg", "attribute value"); - return getString(0); - - case '&': - parseEntityReference(); - break; - - case ' ': - case '\t': - case '\n': - if (delim == -1) { - return getString(0); - } - addString(' '); - ch = in.read(); - break; - - case '\'': - case '"': - if (delim == ch) { - ch = in.read(); - return getString(0); - } - /* fall through */ - - default: - addString(ch & 0xFF); - ch = in.read(); - break; - } - } - } - - /** - * Parse an attribute default value. - * [147] 425:1 - */ - void parseAttributeDefaultValue(AttributeList atts) throws IOException { - if (ch == '#') { - ch = in.read(); - if (!parseIdentifier(true)) { - error("invalid", "attribute value"); - return; - } - skipParameterSpace(); - atts.modifier = AttributeList.name2type(getString(0)); - if (atts.modifier != FIXED) { - return; - } - } - atts.value = parseAttributeValueSpecification(); - skipParameterSpace(); - } - - /** - * Parse an attribute definition list declaration. - * [141] 420:15 - * REMIND: associated notation name - */ - void parseAttlistDeclaration() throws IOException { - Vector elems = parseIdentifierList(true); - AttributeList attlist = null, atts = null; - - while (parseIdentifier(true)) { - if (atts == null) { - attlist = atts = new AttributeList(getString(0)); - } else { - atts.next = new AttributeList(getString(0)); - atts = atts.next; - } - skipParameterSpace(); - parseAttributeDeclaredValue(atts); - parseAttributeDefaultValue(atts); - - if ((atts.modifier == IMPLIED) && (atts.values != null) && (atts.values.size() == 1)) { - atts.value = (String)atts.values.elementAt(0); - } - } - - expect('>'); - - if (in.replace == 0) { - for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) { - dtd.defineAttributes(e.nextElement(), attlist); - } - } - } - - /** - * Parse an ignored section until ]]> is encountered. - */ - void parseIgnoredSection() throws IOException { - int depth = 1; - in.replace++; - while (true) { - switch (ch) { - case '<': - if ((ch = in.read()) == '!') { - if ((ch = in.read()) == '[') { - ch = in.read(); - depth++; - } - } - break; - case ']': - if ((ch = in.read()) == ']') { - if ((ch = in.read()) == '>') { - ch = in.read(); - if (--depth == 0) { - in.replace--; - return; - } - } - } - break; - case -1: - error("eof"); - in.replace--; - return; - - default: - ch = in.read(); - break; - } - } - } - - /** - * Parse a marked section declaration. - * [93] 391:13 - * REMIND: deal with all status keywords - */ - void parseMarkedSectionDeclaration() throws IOException { - ch = in.read(); - skipSpace(); - if (!parseIdentifier(true)) { - error("expected", "section status keyword"); - return; - } - String str = getString(0); - skipSpace(); - expect('['); - if ("ignore".equals(str)) { - parseIgnoredSection(); - } else { - if (!"include".equals(str)) { - error("invalid.arg", "section status keyword", str); - } - parseSection(); - expect(']'); - expect(']'); - expect('>'); - } - } - - /** - * Parse an external identifier - * [73] 379:1 - */ - void parseExternalIdentifier() throws IOException { - if (parseIdentifier(false)) { - String id = getString(0); - skipParameterSpace(); - - if (id.equals("PUBLIC")) { - if ((ch == '\'') || (ch == '"')) { - parseAttributeValueSpecification(); - } else { - error("expected", "public identifier"); - } - skipParameterSpace(); - } else if (!id.equals("SYSTEM")) { - error("invalid", "external identifier"); - } - if ((ch == '\'') || (ch == '"')) { - parseAttributeValueSpecification(); - } - skipParameterSpace(); - } - } - - /** - * Parse document type declaration. - * [110] 403:1 - */ - void parseDocumentTypeDeclaration() throws IOException { - skipParameterSpace(); - if (!parseIdentifier(true)) { - error("expected", "identifier"); - } else { - skipParameterSpace(); - } - strpos = 0; - parseExternalIdentifier(); - - if (ch == '[') { - ch = in.read(); - parseSection(); - expect(']'); - skipParameterSpace(); - } - expect('>'); - } - - /** - * Parse a section of the input upto EOF or ']'. - */ - @SuppressWarnings("fallthrough") - void parseSection() throws IOException { - while (true) { - switch (ch) { - case ']': - return; - - case '<': - switch (ch = in.read()) { - case '!': - switch (ch = in.read()) { - case '[': - parseMarkedSectionDeclaration(); - break; - - case '-': - skipParameterSpace(); - expect('>'); - break; - - default: - if (parseIdentifier(true)) { - String str = getString(0); - - if (str.equals("element")) { - parseElementDeclaration(); - - } else if (str.equals("entity")) { - parseEntityDeclaration(); - - } else if (str.equals("attlist")) { - parseAttlistDeclaration(); - - } else if (str.equals("doctype")) { - parseDocumentTypeDeclaration(); - - } else if (str.equals("usemap")) { - error("ignoring", "usemap"); - while ((ch != -1) && (ch != '>')) { - ch = in.read(); - } - expect('>'); - } else if (str.equals("shortref")) { - error("ignoring", "shortref"); - while ((ch != -1) && (ch != '>')) { - ch = in.read(); - } - expect('>'); - } else if (str.equals("notation")) { - error("ignoring", "notation"); - while ((ch != -1) && (ch != '>')) { - ch = in.read(); - } - expect('>'); - } else { - error("markup"); - } - } else { - error("markup"); - while ((ch != -1) && (ch != '>')) { - ch = in.read(); - } - expect('>'); - } - } - } - break; - - case -1: - return; - - default: - char str[] = {(char)ch}; - error("invalid.arg", "character", "'" + new String(str) + "' / " + ch); - /* fall through */ - - case ' ': - case '\t': - case '\n': - ch = in.read(); - break; - } - } - } - - /** - * Parse a DTD. - * @return the dtd or null if an error occurred. - */ - DTD parse(InputStream in, DTDBuilder dtd) { - try { - this.dtd = dtd; - this.in = new DTDInputStream(in, dtd); - - ch = this.in.read(); - parseSection(); - - if (ch != -1) { - error("premature"); - } - } catch (IOException e) { - error("ioexception"); - } catch (Exception e) { - error("exception", e.getClass().getName(), e.getMessage()); - e.printStackTrace(); - } catch (ThreadDeath e) { - error("terminated"); - } - return (nerrors > 0) ? null : dtd; - } -} --- /dev/null 2020-02-11 10:29:13.086348146 +0100 +++ new/src/java.desktop/share/tools/org/openjdk/buildtools/dtdbuilder/DTDParser.java 2020-03-23 19:57:02.195962513 +0100 @@ -0,0 +1,927 @@ +/* + * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.buildtools.dtdbuilder; + +import javax.swing.text.html.parser.*; +import java.net.URL; +import java.io.IOException; +import java.io.InputStream; +import java.util.Enumeration; +import java.util.Vector; +import java.util.Hashtable; +import java.util.BitSet; +import java.text.MessageFormat; + +/** + * A parser for DTDs. This parser roughly corresponds to the + * rules specified in "The SGML Handbook" by Charles F. Goldfarb. + * The end result of parsing the stream is a DTD object. + * + * + * @see DTD + * @see DTDInputStream + * @author Arthur van Hoff + */ +final +class DTDParser implements DTDConstants { + DTDBuilder dtd; + DTDInputStream in; + int ch; + char str[] = new char[128]; + int strpos = 0; + int nerrors = 0; + + /** + * Report an error. + */ + void error(String err, String arg1, String arg2, String arg3) { + nerrors++; + + String msgParams[] = {arg1, arg2, arg3}; + + String str = getSubstProp("dtderr." + err, msgParams); + if (str == null) { + str = err + "[" + arg1 + "," + arg2 + "," + arg3 + "]"; + } + System.err.println("line " + in.ln + ", dtd " + dtd + ": " + str); + } + void error(String err, String arg1, String arg2) { + error(err, arg1, arg2, "?"); + } + void error(String err, String arg1) { + error(err, arg1, "?", "?"); + } + void error(String err) { + error(err, "?", "?", "?"); + } + + private String getSubstProp(String propName, String args[]) { + String prop = System.getProperty(propName); + + if (prop == null) { + return null; + } + + return MessageFormat.format(prop, (Object[])args); + } + + /** + * Expect a character. + */ + boolean expect(int c) throws IOException { + if (ch != c) { + char str[] = {(char)c}; + error("expected", "'" + new String(str) + "'"); + return false; + } + ch = in.read(); + return true; + } + + /** + * Add a char to the string buffer. + */ + void addString(int c) { + if (strpos == str.length) { + char newstr[] = new char[str.length * 2]; + System.arraycopy(str, 0, newstr, 0, str.length); + str = newstr; + } + str[strpos++] = (char)c; + } + + /** + * Get the string which was accumulated in the buffer. + * Pos is the starting position of the string. + */ + String getString(int pos) { + char newstr[] = new char[strpos - pos]; + System.arraycopy(str, pos, newstr, 0, strpos - pos); + strpos = pos; + return new String(newstr); + } + + /** + * Get the chars which were accumulated in the buffer. + * Pos is the starting position of the string. + */ + char[] getChars(int pos) { + char newstr[] = new char[strpos - pos]; + System.arraycopy(str, pos, newstr, 0, strpos - pos); + strpos = pos; + return newstr; + } + + /** + * Skip spaces. [5] 297:23 + */ + void skipSpace() throws IOException { + while (true) { + switch (ch) { + case '\n': + case ' ': + case '\t': + ch = in.read(); + break; + + default: + return; + } + } + } + + /** + * Skip tag spaces (includes comments). [65] 372:1 + */ + void skipParameterSpace() throws IOException { + while (true) { + switch (ch) { + case '\n': + case ' ': + case '\t': + ch = in.read(); + break; + case '-': + if ((ch = in.read()) != '-') { + in.push(ch); + ch = '-'; + return; + } + + in.replace++; + while (true) { + switch (ch = in.read()) { + case '-': + if ((ch = in.read()) == '-') { + ch = in.read(); + in.replace--; + skipParameterSpace(); + return; + } + break; + + case -1: + error("eof.arg", "comment"); + in.replace--; + return; + } + } + default: + return; + } + } + } + + /** + * Parse identifier. Uppercase characters are automatically + * folded to lowercase. Returns falsed if no identifier is found. + */ + @SuppressWarnings("fallthrough") + boolean parseIdentifier(boolean lower) throws IOException { + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + if (lower) { + ch = 'a' + (ch - 'A'); + } + /* fall through */ + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + break; + + default: + return false; + } + + addString(ch); + ch = in.read(); + parseNameToken(lower); + return true; + } + + /** + * Parses name token. If lower is true, upper case letters + * are folded to lower case. Returns falsed if no token is found. + */ + @SuppressWarnings("fallthrough") + boolean parseNameToken(boolean lower) throws IOException { + boolean first = true; + + while (true) { + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + if (lower) { + ch = 'a' + (ch - 'A'); + } + /* fall through */ + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + + case '.': case '-': + addString(ch); + ch = in.read(); + first = false; + break; + + default: + return !first; + } + } + } + + /** + * Parse a list of identifiers. + */ + Vector parseIdentifierList(boolean lower) throws IOException { + Vector elems = new Vector<>(); + skipSpace(); + switch (ch) { + case '(': + ch = in.read(); + skipParameterSpace(); + while (parseNameToken(lower)) { + elems.addElement(getString(0)); + skipParameterSpace(); + if (ch == '|') { + ch = in.read(); + skipParameterSpace(); + } + } + expect(')'); + skipParameterSpace(); + break; + + default: + if (!parseIdentifier(lower)) { + error("expected", "identifier"); + break; + } + elems.addElement(getString(0)); + skipParameterSpace(); + break; + } + return elems; + } + + /** + * Parse and Entity reference. Should be called when + * a & is encountered. The data is put in the string buffer. + * [59] 350:17 + */ + private void parseEntityReference() throws IOException { + int pos = strpos; + + if ((ch = in.read()) == '#') { + int n = 0; + ch = in.read(); + if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) { + addString('#'); + } else { + while ((ch >= '0') && (ch <= '9')) { + n = (n * 10) + ch - '0'; + ch = in.read(); + } + if ((ch == ';') || (ch == '\n')) { + ch = in.read(); + } + addString(n); + return; + } + } + + while (true) { + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + + case '.': case '-': + addString(ch); + ch = in.read(); + break; + + default: + if (strpos == pos) { + addString('&'); + return; + } + String nm = getString(pos); + Entity ent = dtd.getEntity(nm); + if (ent == null) { + error("undef.entref" + nm); + return; + } + if ((ch == ';') || (ch == '\n')) { + ch = in.read(); + } + char data[] = ent.getData(); + for (int i = 0 ; i < data.length ; i++) { + addString(data[i]); + } + return; + } + } + } + + /** + * Parse an entity declaration. + * [101] 394:18 + * REMIND: external entity type + */ + private void parseEntityDeclaration() throws IOException { + int type = GENERAL; + + skipSpace(); + if (ch == '%') { + ch = in.read(); + type = PARAMETER; + skipSpace(); + } + if (ch == '#') { + addString('#'); + ch = in.read(); + } + if (!parseIdentifier(false)) { + error("expected", "identifier"); + return; + } + String nm = getString(0); + skipParameterSpace(); + if (parseIdentifier(false)) { + String tnm = getString(0); + int t = Entity.name2type(tnm); + if (t == 0) { + error("invalid.arg", "entity type", tnm); + } else { + type |= t; + } + skipParameterSpace(); + } + + if ((ch != '"') && (ch != '\'')) { + error("expected", "entity value"); + skipParameterSpace(); + if (ch == '>') { + ch = in.read(); + } + return; + } + + int term = ch; + ch = in.read(); + while ((ch != -1) && (ch != term)) { + if (ch == '&') { + parseEntityReference(); + } else { + addString(ch & 0xFF); + ch = in.read(); + } + } + if (ch == term) { + ch = in.read(); + } + if (in.replace == 0) { + char data[] = getChars(0); + dtd.defineEntity(nm, type, data); + } else { + strpos = 0; + } + skipParameterSpace(); + expect('>'); + } + + /** + * Parse content model. + * [126] 410:1 + * REMIND: data tag group + */ + ContentModel parseContentModel() throws IOException { + ContentModel m = null; + + switch (ch) { + case '(': + ch = in.read(); + skipParameterSpace(); + ContentModel e = parseContentModel(); + + if (ch != ')') { + m = new ContentModel(ch, e); + do { + ch = in.read(); + skipParameterSpace(); + e.next = parseContentModel(); + if (e.next.type == m.type) { + e.next = (ContentModel)e.next.content; + } + for (; e.next != null ; e = e.next); + } while (ch == m.type); + } else { + m = new ContentModel(',', e); + } + expect(')'); + break; + + case '#': + ch = in.read(); + if (parseIdentifier(true)) { + m = new ContentModel('*', new ContentModel(dtd.getElement("#" + getString(0)))); + } else { + error("invalid", "content model"); + } + break; + + default: + if (parseIdentifier(true)) { + m = new ContentModel(dtd.getElement(getString(0))); + } else { + error("invalid", "content model"); + } + break; + } + + switch (ch) { + case '?': + case '*': + case '+': + m = new ContentModel(ch, m); + ch = in.read(); + break; + } + skipParameterSpace(); + + return m; + } + + /** + * Parse element declaration. + * [116] 405:6 + */ + void parseElementDeclaration() throws IOException { + Vector elems = parseIdentifierList(true); + BitSet inclusions = null; + BitSet exclusions = null; + boolean omitStart = false; + boolean omitEnd = false; + + if ((ch == '-') || (ch == 'O')) { + omitStart = ch == 'O'; + ch = in.read(); + skipParameterSpace(); + + if ((ch == '-') || (ch == 'O')) { + omitEnd = ch == 'O'; + ch = in.read(); + skipParameterSpace(); + } else { + expect('-'); + } + } + + int type = MODEL; + ContentModel content = null; + if (parseIdentifier(false)) { + String nm = getString(0); + type = Element.name2type(nm); + if (type == 0) { + error("invalid.arg", "content type", nm); + type = EMPTY; + } + skipParameterSpace(); + } else { + content = parseContentModel(); + } + + if ((type == MODEL) || (type == ANY)) { + if (ch == '-') { + ch = in.read(); + Vector v = parseIdentifierList(true); + exclusions = new BitSet(); + for (Enumeration e = v.elements() ; e.hasMoreElements() ;) { + exclusions.set(dtd.getElement(e.nextElement()).getIndex()); + } + } + if (ch == '+') { + ch = in.read(); + Vector v = parseIdentifierList(true); + inclusions = new BitSet(); + for (Enumeration e = v.elements() ; e.hasMoreElements() ;) { + inclusions.set(dtd.getElement(e.nextElement()).getIndex()); + } + } + } + expect('>'); + + if (in.replace == 0) { + for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) { + dtd.defineElement(e.nextElement(), type, omitStart, omitEnd, content, exclusions, inclusions, null); + } + } + } + + /** + * Parse an attribute declared value. + * [145] 422:6 + */ + void parseAttributeDeclaredValue(AttributeList atts) throws IOException { + if (ch == '(') { + atts.values = parseIdentifierList(true); + atts.type = NMTOKEN; + return; + } + if (!parseIdentifier(false)) { + error("invalid", "attribute value"); + return; + } + atts.type = AttributeList.name2type(getString(0)); + skipParameterSpace(); + if (atts.type == NOTATION) { + atts.values = parseIdentifierList(true); + } + } + + /** + * Parse an attribute value specification. + * [33] 331:1 + */ + @SuppressWarnings("fallthrough") + String parseAttributeValueSpecification() throws IOException { + int delim = -1; + switch (ch) { + case '\'': + case '"': + delim = ch; + ch = in.read(); + } + while (true) { + switch (ch) { + case -1: + error("eof.arg", "attribute value"); + return getString(0); + + case '&': + parseEntityReference(); + break; + + case ' ': + case '\t': + case '\n': + if (delim == -1) { + return getString(0); + } + addString(' '); + ch = in.read(); + break; + + case '\'': + case '"': + if (delim == ch) { + ch = in.read(); + return getString(0); + } + /* fall through */ + + default: + addString(ch & 0xFF); + ch = in.read(); + break; + } + } + } + + /** + * Parse an attribute default value. + * [147] 425:1 + */ + void parseAttributeDefaultValue(AttributeList atts) throws IOException { + if (ch == '#') { + ch = in.read(); + if (!parseIdentifier(true)) { + error("invalid", "attribute value"); + return; + } + skipParameterSpace(); + atts.modifier = AttributeList.name2type(getString(0)); + if (atts.modifier != FIXED) { + return; + } + } + atts.value = parseAttributeValueSpecification(); + skipParameterSpace(); + } + + /** + * Parse an attribute definition list declaration. + * [141] 420:15 + * REMIND: associated notation name + */ + void parseAttlistDeclaration() throws IOException { + Vector elems = parseIdentifierList(true); + AttributeList attlist = null, atts = null; + + while (parseIdentifier(true)) { + if (atts == null) { + attlist = atts = new AttributeList(getString(0)); + } else { + atts.next = new AttributeList(getString(0)); + atts = atts.next; + } + skipParameterSpace(); + parseAttributeDeclaredValue(atts); + parseAttributeDefaultValue(atts); + + if ((atts.modifier == IMPLIED) && (atts.values != null) && (atts.values.size() == 1)) { + atts.value = (String)atts.values.elementAt(0); + } + } + + expect('>'); + + if (in.replace == 0) { + for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) { + dtd.defineAttributes(e.nextElement(), attlist); + } + } + } + + /** + * Parse an ignored section until ]]> is encountered. + */ + void parseIgnoredSection() throws IOException { + int depth = 1; + in.replace++; + while (true) { + switch (ch) { + case '<': + if ((ch = in.read()) == '!') { + if ((ch = in.read()) == '[') { + ch = in.read(); + depth++; + } + } + break; + case ']': + if ((ch = in.read()) == ']') { + if ((ch = in.read()) == '>') { + ch = in.read(); + if (--depth == 0) { + in.replace--; + return; + } + } + } + break; + case -1: + error("eof"); + in.replace--; + return; + + default: + ch = in.read(); + break; + } + } + } + + /** + * Parse a marked section declaration. + * [93] 391:13 + * REMIND: deal with all status keywords + */ + void parseMarkedSectionDeclaration() throws IOException { + ch = in.read(); + skipSpace(); + if (!parseIdentifier(true)) { + error("expected", "section status keyword"); + return; + } + String str = getString(0); + skipSpace(); + expect('['); + if ("ignore".equals(str)) { + parseIgnoredSection(); + } else { + if (!"include".equals(str)) { + error("invalid.arg", "section status keyword", str); + } + parseSection(); + expect(']'); + expect(']'); + expect('>'); + } + } + + /** + * Parse an external identifier + * [73] 379:1 + */ + void parseExternalIdentifier() throws IOException { + if (parseIdentifier(false)) { + String id = getString(0); + skipParameterSpace(); + + if (id.equals("PUBLIC")) { + if ((ch == '\'') || (ch == '"')) { + parseAttributeValueSpecification(); + } else { + error("expected", "public identifier"); + } + skipParameterSpace(); + } else if (!id.equals("SYSTEM")) { + error("invalid", "external identifier"); + } + if ((ch == '\'') || (ch == '"')) { + parseAttributeValueSpecification(); + } + skipParameterSpace(); + } + } + + /** + * Parse document type declaration. + * [110] 403:1 + */ + void parseDocumentTypeDeclaration() throws IOException { + skipParameterSpace(); + if (!parseIdentifier(true)) { + error("expected", "identifier"); + } else { + skipParameterSpace(); + } + strpos = 0; + parseExternalIdentifier(); + + if (ch == '[') { + ch = in.read(); + parseSection(); + expect(']'); + skipParameterSpace(); + } + expect('>'); + } + + /** + * Parse a section of the input upto EOF or ']'. + */ + @SuppressWarnings("fallthrough") + void parseSection() throws IOException { + while (true) { + switch (ch) { + case ']': + return; + + case '<': + switch (ch = in.read()) { + case '!': + switch (ch = in.read()) { + case '[': + parseMarkedSectionDeclaration(); + break; + + case '-': + skipParameterSpace(); + expect('>'); + break; + + default: + if (parseIdentifier(true)) { + String str = getString(0); + + if (str.equals("element")) { + parseElementDeclaration(); + + } else if (str.equals("entity")) { + parseEntityDeclaration(); + + } else if (str.equals("attlist")) { + parseAttlistDeclaration(); + + } else if (str.equals("doctype")) { + parseDocumentTypeDeclaration(); + + } else if (str.equals("usemap")) { + error("ignoring", "usemap"); + while ((ch != -1) && (ch != '>')) { + ch = in.read(); + } + expect('>'); + } else if (str.equals("shortref")) { + error("ignoring", "shortref"); + while ((ch != -1) && (ch != '>')) { + ch = in.read(); + } + expect('>'); + } else if (str.equals("notation")) { + error("ignoring", "notation"); + while ((ch != -1) && (ch != '>')) { + ch = in.read(); + } + expect('>'); + } else { + error("markup"); + } + } else { + error("markup"); + while ((ch != -1) && (ch != '>')) { + ch = in.read(); + } + expect('>'); + } + } + } + break; + + case -1: + return; + + default: + char str[] = {(char)ch}; + error("invalid.arg", "character", "'" + new String(str) + "' / " + ch); + /* fall through */ + + case ' ': + case '\t': + case '\n': + ch = in.read(); + break; + } + } + } + + /** + * Parse a DTD. + * @return the dtd or null if an error occurred. + */ + DTD parse(InputStream in, DTDBuilder dtd) { + try { + this.dtd = dtd; + this.in = new DTDInputStream(in, dtd); + + ch = this.in.read(); + parseSection(); + + if (ch != -1) { + error("premature"); + } + } catch (IOException e) { + error("ioexception"); + } catch (Exception e) { + error("exception", e.getClass().getName(), e.getMessage()); + e.printStackTrace(); + } catch (ThreadDeath e) { + error("terminated"); + } + return (nerrors > 0) ? null : dtd; + } +}