--- old/make/jdk/src/classes/build/tools/dtdbuilder/DTDParser.java 2020-03-23 19:57:02.563962510 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,927 +0,0 @@
-/*
- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.dtdbuilder;
-
-import javax.swing.text.html.parser.*;
-import java.net.URL;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Enumeration;
-import java.util.Vector;
-import java.util.Hashtable;
-import java.util.BitSet;
-import java.text.MessageFormat;
-
-/**
- * A parser for DTDs. This parser roughly corresponds to the
- * rules specified in "The SGML Handbook" by Charles F. Goldfarb.
- * The end result of parsing the stream is a DTD object.
- *
- *
- * @see DTD
- * @see DTDInputStream
- * @author Arthur van Hoff
- */
-final
-class DTDParser implements DTDConstants {
- DTDBuilder dtd;
- DTDInputStream in;
- int ch;
- char str[] = new char[128];
- int strpos = 0;
- int nerrors = 0;
-
- /**
- * Report an error.
- */
- void error(String err, String arg1, String arg2, String arg3) {
- nerrors++;
-
- String msgParams[] = {arg1, arg2, arg3};
-
- String str = getSubstProp("dtderr." + err, msgParams);
- if (str == null) {
- str = err + "[" + arg1 + "," + arg2 + "," + arg3 + "]";
- }
- System.err.println("line " + in.ln + ", dtd " + dtd + ": " + str);
- }
- void error(String err, String arg1, String arg2) {
- error(err, arg1, arg2, "?");
- }
- void error(String err, String arg1) {
- error(err, arg1, "?", "?");
- }
- void error(String err) {
- error(err, "?", "?", "?");
- }
-
- private String getSubstProp(String propName, String args[]) {
- String prop = System.getProperty(propName);
-
- if (prop == null) {
- return null;
- }
-
- return MessageFormat.format(prop, (Object[])args);
- }
-
- /**
- * Expect a character.
- */
- boolean expect(int c) throws IOException {
- if (ch != c) {
- char str[] = {(char)c};
- error("expected", "'" + new String(str) + "'");
- return false;
- }
- ch = in.read();
- return true;
- }
-
- /**
- * Add a char to the string buffer.
- */
- void addString(int c) {
- if (strpos == str.length) {
- char newstr[] = new char[str.length * 2];
- System.arraycopy(str, 0, newstr, 0, str.length);
- str = newstr;
- }
- str[strpos++] = (char)c;
- }
-
- /**
- * Get the string which was accumulated in the buffer.
- * Pos is the starting position of the string.
- */
- String getString(int pos) {
- char newstr[] = new char[strpos - pos];
- System.arraycopy(str, pos, newstr, 0, strpos - pos);
- strpos = pos;
- return new String(newstr);
- }
-
- /**
- * Get the chars which were accumulated in the buffer.
- * Pos is the starting position of the string.
- */
- char[] getChars(int pos) {
- char newstr[] = new char[strpos - pos];
- System.arraycopy(str, pos, newstr, 0, strpos - pos);
- strpos = pos;
- return newstr;
- }
-
- /**
- * Skip spaces. [5] 297:23
- */
- void skipSpace() throws IOException {
- while (true) {
- switch (ch) {
- case '\n':
- case ' ':
- case '\t':
- ch = in.read();
- break;
-
- default:
- return;
- }
- }
- }
-
- /**
- * Skip tag spaces (includes comments). [65] 372:1
- */
- void skipParameterSpace() throws IOException {
- while (true) {
- switch (ch) {
- case '\n':
- case ' ':
- case '\t':
- ch = in.read();
- break;
- case '-':
- if ((ch = in.read()) != '-') {
- in.push(ch);
- ch = '-';
- return;
- }
-
- in.replace++;
- while (true) {
- switch (ch = in.read()) {
- case '-':
- if ((ch = in.read()) == '-') {
- ch = in.read();
- in.replace--;
- skipParameterSpace();
- return;
- }
- break;
-
- case -1:
- error("eof.arg", "comment");
- in.replace--;
- return;
- }
- }
- default:
- return;
- }
- }
- }
-
- /**
- * Parse identifier. Uppercase characters are automatically
- * folded to lowercase. Returns falsed if no identifier is found.
- */
- @SuppressWarnings("fallthrough")
- boolean parseIdentifier(boolean lower) throws IOException {
- switch (ch) {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- if (lower) {
- ch = 'a' + (ch - 'A');
- }
- /* fall through */
-
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- break;
-
- default:
- return false;
- }
-
- addString(ch);
- ch = in.read();
- parseNameToken(lower);
- return true;
- }
-
- /**
- * Parses name token. If lower
is true, upper case letters
- * are folded to lower case. Returns falsed if no token is found.
- */
- @SuppressWarnings("fallthrough")
- boolean parseNameToken(boolean lower) throws IOException {
- boolean first = true;
-
- while (true) {
- switch (ch) {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- if (lower) {
- ch = 'a' + (ch - 'A');
- }
- /* fall through */
-
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
-
- case '.': case '-':
- addString(ch);
- ch = in.read();
- first = false;
- break;
-
- default:
- return !first;
- }
- }
- }
-
- /**
- * Parse a list of identifiers.
- */
- Vector parseIdentifierList(boolean lower) throws IOException {
- Vector elems = new Vector<>();
- skipSpace();
- switch (ch) {
- case '(':
- ch = in.read();
- skipParameterSpace();
- while (parseNameToken(lower)) {
- elems.addElement(getString(0));
- skipParameterSpace();
- if (ch == '|') {
- ch = in.read();
- skipParameterSpace();
- }
- }
- expect(')');
- skipParameterSpace();
- break;
-
- default:
- if (!parseIdentifier(lower)) {
- error("expected", "identifier");
- break;
- }
- elems.addElement(getString(0));
- skipParameterSpace();
- break;
- }
- return elems;
- }
-
- /**
- * Parse and Entity reference. Should be called when
- * a & is encountered. The data is put in the string buffer.
- * [59] 350:17
- */
- private void parseEntityReference() throws IOException {
- int pos = strpos;
-
- if ((ch = in.read()) == '#') {
- int n = 0;
- ch = in.read();
- if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) {
- addString('#');
- } else {
- while ((ch >= '0') && (ch <= '9')) {
- n = (n * 10) + ch - '0';
- ch = in.read();
- }
- if ((ch == ';') || (ch == '\n')) {
- ch = in.read();
- }
- addString(n);
- return;
- }
- }
-
- while (true) {
- switch (ch) {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
-
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
-
- case '.': case '-':
- addString(ch);
- ch = in.read();
- break;
-
- default:
- if (strpos == pos) {
- addString('&');
- return;
- }
- String nm = getString(pos);
- Entity ent = dtd.getEntity(nm);
- if (ent == null) {
- error("undef.entref" + nm);
- return;
- }
- if ((ch == ';') || (ch == '\n')) {
- ch = in.read();
- }
- char data[] = ent.getData();
- for (int i = 0 ; i < data.length ; i++) {
- addString(data[i]);
- }
- return;
- }
- }
- }
-
- /**
- * Parse an entity declaration.
- * [101] 394:18
- * REMIND: external entity type
- */
- private void parseEntityDeclaration() throws IOException {
- int type = GENERAL;
-
- skipSpace();
- if (ch == '%') {
- ch = in.read();
- type = PARAMETER;
- skipSpace();
- }
- if (ch == '#') {
- addString('#');
- ch = in.read();
- }
- if (!parseIdentifier(false)) {
- error("expected", "identifier");
- return;
- }
- String nm = getString(0);
- skipParameterSpace();
- if (parseIdentifier(false)) {
- String tnm = getString(0);
- int t = Entity.name2type(tnm);
- if (t == 0) {
- error("invalid.arg", "entity type", tnm);
- } else {
- type |= t;
- }
- skipParameterSpace();
- }
-
- if ((ch != '"') && (ch != '\'')) {
- error("expected", "entity value");
- skipParameterSpace();
- if (ch == '>') {
- ch = in.read();
- }
- return;
- }
-
- int term = ch;
- ch = in.read();
- while ((ch != -1) && (ch != term)) {
- if (ch == '&') {
- parseEntityReference();
- } else {
- addString(ch & 0xFF);
- ch = in.read();
- }
- }
- if (ch == term) {
- ch = in.read();
- }
- if (in.replace == 0) {
- char data[] = getChars(0);
- dtd.defineEntity(nm, type, data);
- } else {
- strpos = 0;
- }
- skipParameterSpace();
- expect('>');
- }
-
- /**
- * Parse content model.
- * [126] 410:1
- * REMIND: data tag group
- */
- ContentModel parseContentModel() throws IOException {
- ContentModel m = null;
-
- switch (ch) {
- case '(':
- ch = in.read();
- skipParameterSpace();
- ContentModel e = parseContentModel();
-
- if (ch != ')') {
- m = new ContentModel(ch, e);
- do {
- ch = in.read();
- skipParameterSpace();
- e.next = parseContentModel();
- if (e.next.type == m.type) {
- e.next = (ContentModel)e.next.content;
- }
- for (; e.next != null ; e = e.next);
- } while (ch == m.type);
- } else {
- m = new ContentModel(',', e);
- }
- expect(')');
- break;
-
- case '#':
- ch = in.read();
- if (parseIdentifier(true)) {
- m = new ContentModel('*', new ContentModel(dtd.getElement("#" + getString(0))));
- } else {
- error("invalid", "content model");
- }
- break;
-
- default:
- if (parseIdentifier(true)) {
- m = new ContentModel(dtd.getElement(getString(0)));
- } else {
- error("invalid", "content model");
- }
- break;
- }
-
- switch (ch) {
- case '?':
- case '*':
- case '+':
- m = new ContentModel(ch, m);
- ch = in.read();
- break;
- }
- skipParameterSpace();
-
- return m;
- }
-
- /**
- * Parse element declaration.
- * [116] 405:6
- */
- void parseElementDeclaration() throws IOException {
- Vector elems = parseIdentifierList(true);
- BitSet inclusions = null;
- BitSet exclusions = null;
- boolean omitStart = false;
- boolean omitEnd = false;
-
- if ((ch == '-') || (ch == 'O')) {
- omitStart = ch == 'O';
- ch = in.read();
- skipParameterSpace();
-
- if ((ch == '-') || (ch == 'O')) {
- omitEnd = ch == 'O';
- ch = in.read();
- skipParameterSpace();
- } else {
- expect('-');
- }
- }
-
- int type = MODEL;
- ContentModel content = null;
- if (parseIdentifier(false)) {
- String nm = getString(0);
- type = Element.name2type(nm);
- if (type == 0) {
- error("invalid.arg", "content type", nm);
- type = EMPTY;
- }
- skipParameterSpace();
- } else {
- content = parseContentModel();
- }
-
- if ((type == MODEL) || (type == ANY)) {
- if (ch == '-') {
- ch = in.read();
- Vector v = parseIdentifierList(true);
- exclusions = new BitSet();
- for (Enumeration e = v.elements() ; e.hasMoreElements() ;) {
- exclusions.set(dtd.getElement(e.nextElement()).getIndex());
- }
- }
- if (ch == '+') {
- ch = in.read();
- Vector v = parseIdentifierList(true);
- inclusions = new BitSet();
- for (Enumeration e = v.elements() ; e.hasMoreElements() ;) {
- inclusions.set(dtd.getElement(e.nextElement()).getIndex());
- }
- }
- }
- expect('>');
-
- if (in.replace == 0) {
- for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) {
- dtd.defineElement(e.nextElement(), type, omitStart, omitEnd, content, exclusions, inclusions, null);
- }
- }
- }
-
- /**
- * Parse an attribute declared value.
- * [145] 422:6
- */
- void parseAttributeDeclaredValue(AttributeList atts) throws IOException {
- if (ch == '(') {
- atts.values = parseIdentifierList(true);
- atts.type = NMTOKEN;
- return;
- }
- if (!parseIdentifier(false)) {
- error("invalid", "attribute value");
- return;
- }
- atts.type = AttributeList.name2type(getString(0));
- skipParameterSpace();
- if (atts.type == NOTATION) {
- atts.values = parseIdentifierList(true);
- }
- }
-
- /**
- * Parse an attribute value specification.
- * [33] 331:1
- */
- @SuppressWarnings("fallthrough")
- String parseAttributeValueSpecification() throws IOException {
- int delim = -1;
- switch (ch) {
- case '\'':
- case '"':
- delim = ch;
- ch = in.read();
- }
- while (true) {
- switch (ch) {
- case -1:
- error("eof.arg", "attribute value");
- return getString(0);
-
- case '&':
- parseEntityReference();
- break;
-
- case ' ':
- case '\t':
- case '\n':
- if (delim == -1) {
- return getString(0);
- }
- addString(' ');
- ch = in.read();
- break;
-
- case '\'':
- case '"':
- if (delim == ch) {
- ch = in.read();
- return getString(0);
- }
- /* fall through */
-
- default:
- addString(ch & 0xFF);
- ch = in.read();
- break;
- }
- }
- }
-
- /**
- * Parse an attribute default value.
- * [147] 425:1
- */
- void parseAttributeDefaultValue(AttributeList atts) throws IOException {
- if (ch == '#') {
- ch = in.read();
- if (!parseIdentifier(true)) {
- error("invalid", "attribute value");
- return;
- }
- skipParameterSpace();
- atts.modifier = AttributeList.name2type(getString(0));
- if (atts.modifier != FIXED) {
- return;
- }
- }
- atts.value = parseAttributeValueSpecification();
- skipParameterSpace();
- }
-
- /**
- * Parse an attribute definition list declaration.
- * [141] 420:15
- * REMIND: associated notation name
- */
- void parseAttlistDeclaration() throws IOException {
- Vector elems = parseIdentifierList(true);
- AttributeList attlist = null, atts = null;
-
- while (parseIdentifier(true)) {
- if (atts == null) {
- attlist = atts = new AttributeList(getString(0));
- } else {
- atts.next = new AttributeList(getString(0));
- atts = atts.next;
- }
- skipParameterSpace();
- parseAttributeDeclaredValue(atts);
- parseAttributeDefaultValue(atts);
-
- if ((atts.modifier == IMPLIED) && (atts.values != null) && (atts.values.size() == 1)) {
- atts.value = (String)atts.values.elementAt(0);
- }
- }
-
- expect('>');
-
- if (in.replace == 0) {
- for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) {
- dtd.defineAttributes(e.nextElement(), attlist);
- }
- }
- }
-
- /**
- * Parse an ignored section until ]]> is encountered.
- */
- void parseIgnoredSection() throws IOException {
- int depth = 1;
- in.replace++;
- while (true) {
- switch (ch) {
- case '<':
- if ((ch = in.read()) == '!') {
- if ((ch = in.read()) == '[') {
- ch = in.read();
- depth++;
- }
- }
- break;
- case ']':
- if ((ch = in.read()) == ']') {
- if ((ch = in.read()) == '>') {
- ch = in.read();
- if (--depth == 0) {
- in.replace--;
- return;
- }
- }
- }
- break;
- case -1:
- error("eof");
- in.replace--;
- return;
-
- default:
- ch = in.read();
- break;
- }
- }
- }
-
- /**
- * Parse a marked section declaration.
- * [93] 391:13
- * REMIND: deal with all status keywords
- */
- void parseMarkedSectionDeclaration() throws IOException {
- ch = in.read();
- skipSpace();
- if (!parseIdentifier(true)) {
- error("expected", "section status keyword");
- return;
- }
- String str = getString(0);
- skipSpace();
- expect('[');
- if ("ignore".equals(str)) {
- parseIgnoredSection();
- } else {
- if (!"include".equals(str)) {
- error("invalid.arg", "section status keyword", str);
- }
- parseSection();
- expect(']');
- expect(']');
- expect('>');
- }
- }
-
- /**
- * Parse an external identifier
- * [73] 379:1
- */
- void parseExternalIdentifier() throws IOException {
- if (parseIdentifier(false)) {
- String id = getString(0);
- skipParameterSpace();
-
- if (id.equals("PUBLIC")) {
- if ((ch == '\'') || (ch == '"')) {
- parseAttributeValueSpecification();
- } else {
- error("expected", "public identifier");
- }
- skipParameterSpace();
- } else if (!id.equals("SYSTEM")) {
- error("invalid", "external identifier");
- }
- if ((ch == '\'') || (ch == '"')) {
- parseAttributeValueSpecification();
- }
- skipParameterSpace();
- }
- }
-
- /**
- * Parse document type declaration.
- * [110] 403:1
- */
- void parseDocumentTypeDeclaration() throws IOException {
- skipParameterSpace();
- if (!parseIdentifier(true)) {
- error("expected", "identifier");
- } else {
- skipParameterSpace();
- }
- strpos = 0;
- parseExternalIdentifier();
-
- if (ch == '[') {
- ch = in.read();
- parseSection();
- expect(']');
- skipParameterSpace();
- }
- expect('>');
- }
-
- /**
- * Parse a section of the input upto EOF or ']'.
- */
- @SuppressWarnings("fallthrough")
- void parseSection() throws IOException {
- while (true) {
- switch (ch) {
- case ']':
- return;
-
- case '<':
- switch (ch = in.read()) {
- case '!':
- switch (ch = in.read()) {
- case '[':
- parseMarkedSectionDeclaration();
- break;
-
- case '-':
- skipParameterSpace();
- expect('>');
- break;
-
- default:
- if (parseIdentifier(true)) {
- String str = getString(0);
-
- if (str.equals("element")) {
- parseElementDeclaration();
-
- } else if (str.equals("entity")) {
- parseEntityDeclaration();
-
- } else if (str.equals("attlist")) {
- parseAttlistDeclaration();
-
- } else if (str.equals("doctype")) {
- parseDocumentTypeDeclaration();
-
- } else if (str.equals("usemap")) {
- error("ignoring", "usemap");
- while ((ch != -1) && (ch != '>')) {
- ch = in.read();
- }
- expect('>');
- } else if (str.equals("shortref")) {
- error("ignoring", "shortref");
- while ((ch != -1) && (ch != '>')) {
- ch = in.read();
- }
- expect('>');
- } else if (str.equals("notation")) {
- error("ignoring", "notation");
- while ((ch != -1) && (ch != '>')) {
- ch = in.read();
- }
- expect('>');
- } else {
- error("markup");
- }
- } else {
- error("markup");
- while ((ch != -1) && (ch != '>')) {
- ch = in.read();
- }
- expect('>');
- }
- }
- }
- break;
-
- case -1:
- return;
-
- default:
- char str[] = {(char)ch};
- error("invalid.arg", "character", "'" + new String(str) + "' / " + ch);
- /* fall through */
-
- case ' ':
- case '\t':
- case '\n':
- ch = in.read();
- break;
- }
- }
- }
-
- /**
- * Parse a DTD.
- * @return the dtd or null if an error occurred.
- */
- DTD parse(InputStream in, DTDBuilder dtd) {
- try {
- this.dtd = dtd;
- this.in = new DTDInputStream(in, dtd);
-
- ch = this.in.read();
- parseSection();
-
- if (ch != -1) {
- error("premature");
- }
- } catch (IOException e) {
- error("ioexception");
- } catch (Exception e) {
- error("exception", e.getClass().getName(), e.getMessage());
- e.printStackTrace();
- } catch (ThreadDeath e) {
- error("terminated");
- }
- return (nerrors > 0) ? null : dtd;
- }
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.desktop/share/tools/org/openjdk/buildtools/dtdbuilder/DTDParser.java 2020-03-23 19:57:02.195962513 +0100
@@ -0,0 +1,927 @@
+/*
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.buildtools.dtdbuilder;
+
+import javax.swing.text.html.parser.*;
+import java.net.URL;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Enumeration;
+import java.util.Vector;
+import java.util.Hashtable;
+import java.util.BitSet;
+import java.text.MessageFormat;
+
+/**
+ * A parser for DTDs. This parser roughly corresponds to the
+ * rules specified in "The SGML Handbook" by Charles F. Goldfarb.
+ * The end result of parsing the stream is a DTD object.
+ *
+ *
+ * @see DTD
+ * @see DTDInputStream
+ * @author Arthur van Hoff
+ */
+final
+class DTDParser implements DTDConstants {
+ DTDBuilder dtd;
+ DTDInputStream in;
+ int ch;
+ char str[] = new char[128];
+ int strpos = 0;
+ int nerrors = 0;
+
+ /**
+ * Report an error.
+ */
+ void error(String err, String arg1, String arg2, String arg3) {
+ nerrors++;
+
+ String msgParams[] = {arg1, arg2, arg3};
+
+ String str = getSubstProp("dtderr." + err, msgParams);
+ if (str == null) {
+ str = err + "[" + arg1 + "," + arg2 + "," + arg3 + "]";
+ }
+ System.err.println("line " + in.ln + ", dtd " + dtd + ": " + str);
+ }
+ void error(String err, String arg1, String arg2) {
+ error(err, arg1, arg2, "?");
+ }
+ void error(String err, String arg1) {
+ error(err, arg1, "?", "?");
+ }
+ void error(String err) {
+ error(err, "?", "?", "?");
+ }
+
+ private String getSubstProp(String propName, String args[]) {
+ String prop = System.getProperty(propName);
+
+ if (prop == null) {
+ return null;
+ }
+
+ return MessageFormat.format(prop, (Object[])args);
+ }
+
+ /**
+ * Expect a character.
+ */
+ boolean expect(int c) throws IOException {
+ if (ch != c) {
+ char str[] = {(char)c};
+ error("expected", "'" + new String(str) + "'");
+ return false;
+ }
+ ch = in.read();
+ return true;
+ }
+
+ /**
+ * Add a char to the string buffer.
+ */
+ void addString(int c) {
+ if (strpos == str.length) {
+ char newstr[] = new char[str.length * 2];
+ System.arraycopy(str, 0, newstr, 0, str.length);
+ str = newstr;
+ }
+ str[strpos++] = (char)c;
+ }
+
+ /**
+ * Get the string which was accumulated in the buffer.
+ * Pos is the starting position of the string.
+ */
+ String getString(int pos) {
+ char newstr[] = new char[strpos - pos];
+ System.arraycopy(str, pos, newstr, 0, strpos - pos);
+ strpos = pos;
+ return new String(newstr);
+ }
+
+ /**
+ * Get the chars which were accumulated in the buffer.
+ * Pos is the starting position of the string.
+ */
+ char[] getChars(int pos) {
+ char newstr[] = new char[strpos - pos];
+ System.arraycopy(str, pos, newstr, 0, strpos - pos);
+ strpos = pos;
+ return newstr;
+ }
+
+ /**
+ * Skip spaces. [5] 297:23
+ */
+ void skipSpace() throws IOException {
+ while (true) {
+ switch (ch) {
+ case '\n':
+ case ' ':
+ case '\t':
+ ch = in.read();
+ break;
+
+ default:
+ return;
+ }
+ }
+ }
+
+ /**
+ * Skip tag spaces (includes comments). [65] 372:1
+ */
+ void skipParameterSpace() throws IOException {
+ while (true) {
+ switch (ch) {
+ case '\n':
+ case ' ':
+ case '\t':
+ ch = in.read();
+ break;
+ case '-':
+ if ((ch = in.read()) != '-') {
+ in.push(ch);
+ ch = '-';
+ return;
+ }
+
+ in.replace++;
+ while (true) {
+ switch (ch = in.read()) {
+ case '-':
+ if ((ch = in.read()) == '-') {
+ ch = in.read();
+ in.replace--;
+ skipParameterSpace();
+ return;
+ }
+ break;
+
+ case -1:
+ error("eof.arg", "comment");
+ in.replace--;
+ return;
+ }
+ }
+ default:
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse identifier. Uppercase characters are automatically
+ * folded to lowercase. Returns falsed if no identifier is found.
+ */
+ @SuppressWarnings("fallthrough")
+ boolean parseIdentifier(boolean lower) throws IOException {
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ if (lower) {
+ ch = 'a' + (ch - 'A');
+ }
+ /* fall through */
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ break;
+
+ default:
+ return false;
+ }
+
+ addString(ch);
+ ch = in.read();
+ parseNameToken(lower);
+ return true;
+ }
+
+ /**
+ * Parses name token. If lower
is true, upper case letters
+ * are folded to lower case. Returns falsed if no token is found.
+ */
+ @SuppressWarnings("fallthrough")
+ boolean parseNameToken(boolean lower) throws IOException {
+ boolean first = true;
+
+ while (true) {
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ if (lower) {
+ ch = 'a' + (ch - 'A');
+ }
+ /* fall through */
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+
+ case '.': case '-':
+ addString(ch);
+ ch = in.read();
+ first = false;
+ break;
+
+ default:
+ return !first;
+ }
+ }
+ }
+
+ /**
+ * Parse a list of identifiers.
+ */
+ Vector parseIdentifierList(boolean lower) throws IOException {
+ Vector elems = new Vector<>();
+ skipSpace();
+ switch (ch) {
+ case '(':
+ ch = in.read();
+ skipParameterSpace();
+ while (parseNameToken(lower)) {
+ elems.addElement(getString(0));
+ skipParameterSpace();
+ if (ch == '|') {
+ ch = in.read();
+ skipParameterSpace();
+ }
+ }
+ expect(')');
+ skipParameterSpace();
+ break;
+
+ default:
+ if (!parseIdentifier(lower)) {
+ error("expected", "identifier");
+ break;
+ }
+ elems.addElement(getString(0));
+ skipParameterSpace();
+ break;
+ }
+ return elems;
+ }
+
+ /**
+ * Parse and Entity reference. Should be called when
+ * a & is encountered. The data is put in the string buffer.
+ * [59] 350:17
+ */
+ private void parseEntityReference() throws IOException {
+ int pos = strpos;
+
+ if ((ch = in.read()) == '#') {
+ int n = 0;
+ ch = in.read();
+ if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) {
+ addString('#');
+ } else {
+ while ((ch >= '0') && (ch <= '9')) {
+ n = (n * 10) + ch - '0';
+ ch = in.read();
+ }
+ if ((ch == ';') || (ch == '\n')) {
+ ch = in.read();
+ }
+ addString(n);
+ return;
+ }
+ }
+
+ while (true) {
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+
+ case '.': case '-':
+ addString(ch);
+ ch = in.read();
+ break;
+
+ default:
+ if (strpos == pos) {
+ addString('&');
+ return;
+ }
+ String nm = getString(pos);
+ Entity ent = dtd.getEntity(nm);
+ if (ent == null) {
+ error("undef.entref" + nm);
+ return;
+ }
+ if ((ch == ';') || (ch == '\n')) {
+ ch = in.read();
+ }
+ char data[] = ent.getData();
+ for (int i = 0 ; i < data.length ; i++) {
+ addString(data[i]);
+ }
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse an entity declaration.
+ * [101] 394:18
+ * REMIND: external entity type
+ */
+ private void parseEntityDeclaration() throws IOException {
+ int type = GENERAL;
+
+ skipSpace();
+ if (ch == '%') {
+ ch = in.read();
+ type = PARAMETER;
+ skipSpace();
+ }
+ if (ch == '#') {
+ addString('#');
+ ch = in.read();
+ }
+ if (!parseIdentifier(false)) {
+ error("expected", "identifier");
+ return;
+ }
+ String nm = getString(0);
+ skipParameterSpace();
+ if (parseIdentifier(false)) {
+ String tnm = getString(0);
+ int t = Entity.name2type(tnm);
+ if (t == 0) {
+ error("invalid.arg", "entity type", tnm);
+ } else {
+ type |= t;
+ }
+ skipParameterSpace();
+ }
+
+ if ((ch != '"') && (ch != '\'')) {
+ error("expected", "entity value");
+ skipParameterSpace();
+ if (ch == '>') {
+ ch = in.read();
+ }
+ return;
+ }
+
+ int term = ch;
+ ch = in.read();
+ while ((ch != -1) && (ch != term)) {
+ if (ch == '&') {
+ parseEntityReference();
+ } else {
+ addString(ch & 0xFF);
+ ch = in.read();
+ }
+ }
+ if (ch == term) {
+ ch = in.read();
+ }
+ if (in.replace == 0) {
+ char data[] = getChars(0);
+ dtd.defineEntity(nm, type, data);
+ } else {
+ strpos = 0;
+ }
+ skipParameterSpace();
+ expect('>');
+ }
+
+ /**
+ * Parse content model.
+ * [126] 410:1
+ * REMIND: data tag group
+ */
+ ContentModel parseContentModel() throws IOException {
+ ContentModel m = null;
+
+ switch (ch) {
+ case '(':
+ ch = in.read();
+ skipParameterSpace();
+ ContentModel e = parseContentModel();
+
+ if (ch != ')') {
+ m = new ContentModel(ch, e);
+ do {
+ ch = in.read();
+ skipParameterSpace();
+ e.next = parseContentModel();
+ if (e.next.type == m.type) {
+ e.next = (ContentModel)e.next.content;
+ }
+ for (; e.next != null ; e = e.next);
+ } while (ch == m.type);
+ } else {
+ m = new ContentModel(',', e);
+ }
+ expect(')');
+ break;
+
+ case '#':
+ ch = in.read();
+ if (parseIdentifier(true)) {
+ m = new ContentModel('*', new ContentModel(dtd.getElement("#" + getString(0))));
+ } else {
+ error("invalid", "content model");
+ }
+ break;
+
+ default:
+ if (parseIdentifier(true)) {
+ m = new ContentModel(dtd.getElement(getString(0)));
+ } else {
+ error("invalid", "content model");
+ }
+ break;
+ }
+
+ switch (ch) {
+ case '?':
+ case '*':
+ case '+':
+ m = new ContentModel(ch, m);
+ ch = in.read();
+ break;
+ }
+ skipParameterSpace();
+
+ return m;
+ }
+
+ /**
+ * Parse element declaration.
+ * [116] 405:6
+ */
+ void parseElementDeclaration() throws IOException {
+ Vector elems = parseIdentifierList(true);
+ BitSet inclusions = null;
+ BitSet exclusions = null;
+ boolean omitStart = false;
+ boolean omitEnd = false;
+
+ if ((ch == '-') || (ch == 'O')) {
+ omitStart = ch == 'O';
+ ch = in.read();
+ skipParameterSpace();
+
+ if ((ch == '-') || (ch == 'O')) {
+ omitEnd = ch == 'O';
+ ch = in.read();
+ skipParameterSpace();
+ } else {
+ expect('-');
+ }
+ }
+
+ int type = MODEL;
+ ContentModel content = null;
+ if (parseIdentifier(false)) {
+ String nm = getString(0);
+ type = Element.name2type(nm);
+ if (type == 0) {
+ error("invalid.arg", "content type", nm);
+ type = EMPTY;
+ }
+ skipParameterSpace();
+ } else {
+ content = parseContentModel();
+ }
+
+ if ((type == MODEL) || (type == ANY)) {
+ if (ch == '-') {
+ ch = in.read();
+ Vector v = parseIdentifierList(true);
+ exclusions = new BitSet();
+ for (Enumeration e = v.elements() ; e.hasMoreElements() ;) {
+ exclusions.set(dtd.getElement(e.nextElement()).getIndex());
+ }
+ }
+ if (ch == '+') {
+ ch = in.read();
+ Vector v = parseIdentifierList(true);
+ inclusions = new BitSet();
+ for (Enumeration e = v.elements() ; e.hasMoreElements() ;) {
+ inclusions.set(dtd.getElement(e.nextElement()).getIndex());
+ }
+ }
+ }
+ expect('>');
+
+ if (in.replace == 0) {
+ for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) {
+ dtd.defineElement(e.nextElement(), type, omitStart, omitEnd, content, exclusions, inclusions, null);
+ }
+ }
+ }
+
+ /**
+ * Parse an attribute declared value.
+ * [145] 422:6
+ */
+ void parseAttributeDeclaredValue(AttributeList atts) throws IOException {
+ if (ch == '(') {
+ atts.values = parseIdentifierList(true);
+ atts.type = NMTOKEN;
+ return;
+ }
+ if (!parseIdentifier(false)) {
+ error("invalid", "attribute value");
+ return;
+ }
+ atts.type = AttributeList.name2type(getString(0));
+ skipParameterSpace();
+ if (atts.type == NOTATION) {
+ atts.values = parseIdentifierList(true);
+ }
+ }
+
+ /**
+ * Parse an attribute value specification.
+ * [33] 331:1
+ */
+ @SuppressWarnings("fallthrough")
+ String parseAttributeValueSpecification() throws IOException {
+ int delim = -1;
+ switch (ch) {
+ case '\'':
+ case '"':
+ delim = ch;
+ ch = in.read();
+ }
+ while (true) {
+ switch (ch) {
+ case -1:
+ error("eof.arg", "attribute value");
+ return getString(0);
+
+ case '&':
+ parseEntityReference();
+ break;
+
+ case ' ':
+ case '\t':
+ case '\n':
+ if (delim == -1) {
+ return getString(0);
+ }
+ addString(' ');
+ ch = in.read();
+ break;
+
+ case '\'':
+ case '"':
+ if (delim == ch) {
+ ch = in.read();
+ return getString(0);
+ }
+ /* fall through */
+
+ default:
+ addString(ch & 0xFF);
+ ch = in.read();
+ break;
+ }
+ }
+ }
+
+ /**
+ * Parse an attribute default value.
+ * [147] 425:1
+ */
+ void parseAttributeDefaultValue(AttributeList atts) throws IOException {
+ if (ch == '#') {
+ ch = in.read();
+ if (!parseIdentifier(true)) {
+ error("invalid", "attribute value");
+ return;
+ }
+ skipParameterSpace();
+ atts.modifier = AttributeList.name2type(getString(0));
+ if (atts.modifier != FIXED) {
+ return;
+ }
+ }
+ atts.value = parseAttributeValueSpecification();
+ skipParameterSpace();
+ }
+
+ /**
+ * Parse an attribute definition list declaration.
+ * [141] 420:15
+ * REMIND: associated notation name
+ */
+ void parseAttlistDeclaration() throws IOException {
+ Vector elems = parseIdentifierList(true);
+ AttributeList attlist = null, atts = null;
+
+ while (parseIdentifier(true)) {
+ if (atts == null) {
+ attlist = atts = new AttributeList(getString(0));
+ } else {
+ atts.next = new AttributeList(getString(0));
+ atts = atts.next;
+ }
+ skipParameterSpace();
+ parseAttributeDeclaredValue(atts);
+ parseAttributeDefaultValue(atts);
+
+ if ((atts.modifier == IMPLIED) && (atts.values != null) && (atts.values.size() == 1)) {
+ atts.value = (String)atts.values.elementAt(0);
+ }
+ }
+
+ expect('>');
+
+ if (in.replace == 0) {
+ for (Enumeration e = elems.elements() ; e.hasMoreElements() ;) {
+ dtd.defineAttributes(e.nextElement(), attlist);
+ }
+ }
+ }
+
+ /**
+ * Parse an ignored section until ]]> is encountered.
+ */
+ void parseIgnoredSection() throws IOException {
+ int depth = 1;
+ in.replace++;
+ while (true) {
+ switch (ch) {
+ case '<':
+ if ((ch = in.read()) == '!') {
+ if ((ch = in.read()) == '[') {
+ ch = in.read();
+ depth++;
+ }
+ }
+ break;
+ case ']':
+ if ((ch = in.read()) == ']') {
+ if ((ch = in.read()) == '>') {
+ ch = in.read();
+ if (--depth == 0) {
+ in.replace--;
+ return;
+ }
+ }
+ }
+ break;
+ case -1:
+ error("eof");
+ in.replace--;
+ return;
+
+ default:
+ ch = in.read();
+ break;
+ }
+ }
+ }
+
+ /**
+ * Parse a marked section declaration.
+ * [93] 391:13
+ * REMIND: deal with all status keywords
+ */
+ void parseMarkedSectionDeclaration() throws IOException {
+ ch = in.read();
+ skipSpace();
+ if (!parseIdentifier(true)) {
+ error("expected", "section status keyword");
+ return;
+ }
+ String str = getString(0);
+ skipSpace();
+ expect('[');
+ if ("ignore".equals(str)) {
+ parseIgnoredSection();
+ } else {
+ if (!"include".equals(str)) {
+ error("invalid.arg", "section status keyword", str);
+ }
+ parseSection();
+ expect(']');
+ expect(']');
+ expect('>');
+ }
+ }
+
+ /**
+ * Parse an external identifier
+ * [73] 379:1
+ */
+ void parseExternalIdentifier() throws IOException {
+ if (parseIdentifier(false)) {
+ String id = getString(0);
+ skipParameterSpace();
+
+ if (id.equals("PUBLIC")) {
+ if ((ch == '\'') || (ch == '"')) {
+ parseAttributeValueSpecification();
+ } else {
+ error("expected", "public identifier");
+ }
+ skipParameterSpace();
+ } else if (!id.equals("SYSTEM")) {
+ error("invalid", "external identifier");
+ }
+ if ((ch == '\'') || (ch == '"')) {
+ parseAttributeValueSpecification();
+ }
+ skipParameterSpace();
+ }
+ }
+
+ /**
+ * Parse document type declaration.
+ * [110] 403:1
+ */
+ void parseDocumentTypeDeclaration() throws IOException {
+ skipParameterSpace();
+ if (!parseIdentifier(true)) {
+ error("expected", "identifier");
+ } else {
+ skipParameterSpace();
+ }
+ strpos = 0;
+ parseExternalIdentifier();
+
+ if (ch == '[') {
+ ch = in.read();
+ parseSection();
+ expect(']');
+ skipParameterSpace();
+ }
+ expect('>');
+ }
+
+ /**
+ * Parse a section of the input upto EOF or ']'.
+ */
+ @SuppressWarnings("fallthrough")
+ void parseSection() throws IOException {
+ while (true) {
+ switch (ch) {
+ case ']':
+ return;
+
+ case '<':
+ switch (ch = in.read()) {
+ case '!':
+ switch (ch = in.read()) {
+ case '[':
+ parseMarkedSectionDeclaration();
+ break;
+
+ case '-':
+ skipParameterSpace();
+ expect('>');
+ break;
+
+ default:
+ if (parseIdentifier(true)) {
+ String str = getString(0);
+
+ if (str.equals("element")) {
+ parseElementDeclaration();
+
+ } else if (str.equals("entity")) {
+ parseEntityDeclaration();
+
+ } else if (str.equals("attlist")) {
+ parseAttlistDeclaration();
+
+ } else if (str.equals("doctype")) {
+ parseDocumentTypeDeclaration();
+
+ } else if (str.equals("usemap")) {
+ error("ignoring", "usemap");
+ while ((ch != -1) && (ch != '>')) {
+ ch = in.read();
+ }
+ expect('>');
+ } else if (str.equals("shortref")) {
+ error("ignoring", "shortref");
+ while ((ch != -1) && (ch != '>')) {
+ ch = in.read();
+ }
+ expect('>');
+ } else if (str.equals("notation")) {
+ error("ignoring", "notation");
+ while ((ch != -1) && (ch != '>')) {
+ ch = in.read();
+ }
+ expect('>');
+ } else {
+ error("markup");
+ }
+ } else {
+ error("markup");
+ while ((ch != -1) && (ch != '>')) {
+ ch = in.read();
+ }
+ expect('>');
+ }
+ }
+ }
+ break;
+
+ case -1:
+ return;
+
+ default:
+ char str[] = {(char)ch};
+ error("invalid.arg", "character", "'" + new String(str) + "' / " + ch);
+ /* fall through */
+
+ case ' ':
+ case '\t':
+ case '\n':
+ ch = in.read();
+ break;
+ }
+ }
+ }
+
+ /**
+ * Parse a DTD.
+ * @return the dtd or null if an error occurred.
+ */
+ DTD parse(InputStream in, DTDBuilder dtd) {
+ try {
+ this.dtd = dtd;
+ this.in = new DTDInputStream(in, dtd);
+
+ ch = this.in.read();
+ parseSection();
+
+ if (ch != -1) {
+ error("premature");
+ }
+ } catch (IOException e) {
+ error("ioexception");
+ } catch (Exception e) {
+ error("exception", e.getClass().getName(), e.getMessage());
+ e.printStackTrace();
+ } catch (ThreadDeath e) {
+ error("terminated");
+ }
+ return (nerrors > 0) ? null : dtd;
+ }
+}