1 /*
   2  * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 import org.xml.sax.Locator;
  31 import org.xml.sax.SAXException;
  32 import org.xml.sax.SAXParseException;
  33 
  34 import java.io.IOException;
  35 import java.util.ArrayList;
  36 import java.util.Enumeration;
  37 import java.util.Hashtable;
  38 import java.util.Locale;
  39 import java.util.Set;
  40 import java.util.Vector;
  41 import java.util.logging.Level;
  42 import java.util.logging.Logger;
  43 
  44 /**
  45  * This implements parsing of XML 1.0 DTDs.
  46  * <p>
  47  * This conforms to the portion of the XML 1.0 specification related to the
  48  * external DTD subset.
  49  * <p>
  50  * For multi-language applications (such as web servers using XML processing to
  51  * create dynamic content), a method supports choosing a locale for parser
  52  * diagnostics which is both understood by the message recipient and supported
  53  * by the parser.
  54  * <p>
  55  * This parser produces a stream of parse events. It supports some features
  56  * (exposing comments, CDATA sections, and entity references) which are not
  57  * required to be reported by conformant XML processors.
  58  *
  59  * @author David Brownell
  60  * @author Janet Koenig
  61  * @author Kohsuke KAWAGUCHI
  62  * @version $Id: DTDParser.java,v 1.2 2009-04-16 15:25:49 snajper Exp $
  63  */
  64 public class DTDParser {
  65 
  66     public final static String TYPE_CDATA = "CDATA";
  67     public final static String TYPE_ID = "ID";
  68     public final static String TYPE_IDREF = "IDREF";
  69     public final static String TYPE_IDREFS = "IDREFS";
  70     public final static String TYPE_ENTITY = "ENTITY";
  71     public final static String TYPE_ENTITIES = "ENTITIES";
  72     public final static String TYPE_NMTOKEN = "NMTOKEN";
  73     public final static String TYPE_NMTOKENS = "NMTOKENS";
  74     public final static String TYPE_NOTATION = "NOTATION";
  75     public final static String TYPE_ENUMERATION = "ENUMERATION";
  76     // stack of input entities being merged
  77     private InputEntity in;
  78     // temporaries reused during parsing
  79     private StringBuffer strTmp;
  80     private char nameTmp[];
  81     private NameCache nameCache;
  82     private char charTmp[] = new char[2];
  83     // temporary DTD parsing state
  84     private boolean doLexicalPE;
  85     // DTD state, used during parsing
  86 //    private SimpleHashtable    elements = new SimpleHashtable (47);
  87     protected final Set declaredElements = new java.util.HashSet();
  88     private SimpleHashtable params = new SimpleHashtable(7);
  89     // exposed to package-private subclass
  90     Hashtable notations = new Hashtable(7);
  91     SimpleHashtable entities = new SimpleHashtable(17);
  92     private SimpleHashtable ids = new SimpleHashtable();
  93     // listeners for DTD parsing events
  94     private DTDEventListener dtdHandler;
  95     private EntityResolver resolver;
  96     private Locale locale;
  97     // string constants -- use these copies so "==" works
  98     // package private
  99     static final String strANY = "ANY";
 100     static final String strEMPTY = "EMPTY";
 101 
 102     private static final Logger LOGGER = Logger.getLogger(DTDParser.class.getName());
 103 
 104     /**
 105      * Used by applications to request locale for diagnostics.
 106      *
 107      * @param l The locale to use, or null to use system defaults (which may
 108      * include only message IDs).
 109      */
 110     public void setLocale(Locale l) throws SAXException {
 111 
 112         if (l != null && !messages.isLocaleSupported(l.toString())) {
 113             throw new SAXException(messages.getMessage(locale,
 114                     "P-078", new Object[]{l}));
 115         }
 116         locale = l;
 117     }
 118 
 119     /**
 120      * Returns the diagnostic locale.
 121      */
 122     public Locale getLocale() {
 123         return locale;
 124     }
 125 
 126     /**
 127      * Chooses a client locale to use for diagnostics, using the first language
 128      * specified in the list that is supported by this parser. That locale is
 129      * then set using <a href="#setLocale(java.util.Locale)"> setLocale()</a>.
 130      * Such a list could be provided by a variety of user preference mechanisms,
 131      * including the HTTP <em>Accept-Language</em> header field.
 132      *
 133      * @param languages Array of language specifiers, ordered with the most
 134      * preferable one at the front. For example, "en-ca" then "fr-ca", followed
 135      * by "zh_CN". Both RFC 1766 and Java styles are supported.
 136      * @return The chosen locale, or null.
 137      * @see MessageCatalog
 138      */
 139     public Locale chooseLocale(String languages[])
 140             throws SAXException {
 141 
 142         Locale l = messages.chooseLocale(languages);
 143 
 144         if (l != null) {
 145             setLocale(l);
 146         }
 147         return l;
 148     }
 149 
 150     /**
 151      * Lets applications control entity resolution.
 152      */
 153     public void setEntityResolver(EntityResolver r) {
 154 
 155         resolver = r;
 156     }
 157 
 158     /**
 159      * Returns the object used to resolve entities
 160      */
 161     public EntityResolver getEntityResolver() {
 162 
 163         return resolver;
 164     }
 165 
 166     /**
 167      * Used by applications to set handling of DTD parsing events.
 168      */
 169     public void setDtdHandler(DTDEventListener handler) {
 170         dtdHandler = handler;
 171         if (handler != null) {
 172             handler.setDocumentLocator(new Locator() {
 173                 @Override
 174                 public String getPublicId() {
 175                     return DTDParser.this.getPublicId();
 176                 }
 177 
 178                 @Override
 179                 public String getSystemId() {
 180                     return DTDParser.this.getSystemId();
 181                 }
 182 
 183                 @Override
 184                 public int getLineNumber() {
 185                     return DTDParser.this.getLineNumber();
 186                 }
 187 
 188                 @Override
 189                 public int getColumnNumber() {
 190                     return DTDParser.this.getColumnNumber();
 191                 }
 192             });
 193         }
 194     }
 195 
 196     /**
 197      * Returns the handler used to for DTD parsing events.
 198      */
 199     public DTDEventListener getDtdHandler() {
 200         return dtdHandler;
 201     }
 202 
 203     /**
 204      * Parse a DTD.
 205      */
 206     public void parse(InputSource in)
 207             throws IOException, SAXException {
 208         init();
 209         parseInternal(in);
 210     }
 211 
 212     /**
 213      * Parse a DTD.
 214      */
 215     public void parse(String uri)
 216             throws IOException, SAXException {
 217         InputSource inSource;
 218 
 219         init();
 220         // System.out.println ("parse (\"" + uri + "\")");
 221         inSource = resolver.resolveEntity(null, uri);
 222 
 223         // If custom resolver punts resolution to parser, handle it ...
 224         if (inSource == null) {
 225             inSource = Resolver.createInputSource(new java.net.URL(uri), false);
 226 
 227             // ... or if custom resolver doesn't correctly construct the
 228             // input entity, patch it up enough so relative URIs work, and
 229             // issue a warning to minimize later confusion.
 230         } else if (inSource.getSystemId() == null) {
 231             warning("P-065", null);
 232             inSource.setSystemId(uri);
 233         }
 234 
 235         parseInternal(inSource);
 236     }
 237 
 238     // makes sure the parser is reset to "before a document"
 239     private void init() {
 240         in = null;
 241 
 242         // alloc temporary data used in parsing
 243         strTmp = new StringBuffer();
 244         nameTmp = new char[20];
 245         nameCache = new NameCache();
 246 
 247         // reset doc info
 248 //        isInAttribute = false;
 249 
 250         doLexicalPE = false;
 251 
 252         entities.clear();
 253         notations.clear();
 254         params.clear();
 255         //    elements.clear ();
 256         declaredElements.clear();
 257 
 258         // initialize predefined references ... re-interpreted later
 259         builtin("amp", "&");
 260         builtin("lt", "<");
 261         builtin("gt", ">");
 262         builtin("quot", "\"");
 263         builtin("apos", "'");
 264 
 265         if (locale == null) {
 266             locale = Locale.getDefault();
 267         }
 268         if (resolver == null) {
 269             resolver = new Resolver();
 270         }
 271         if (dtdHandler == null) {
 272             dtdHandler = new DTDHandlerBase();
 273         }
 274     }
 275 
 276     private void builtin(String entityName, String entityValue) {
 277         InternalEntity entity;
 278         entity = new InternalEntity(entityName, entityValue.toCharArray());
 279         entities.put(entityName, entity);
 280     }
 281 
 282     ////////////////////////////////////////////////////////////////
 283     //
 284     // parsing is by recursive descent, code roughly
 285     // following the BNF rules except tweaked for simple
 286     // lookahead.  rules are more or less in numeric order,
 287     // except where code sharing suggests other structures.
 288     //
 289     // a classic benefit of recursive descent parsers:  it's
 290     // relatively easy to get diagnostics that make sense.
 291     //
 292     ////////////////////////////////////////////////////////////////
 293     @SuppressWarnings("CallToThreadDumpStack")
 294     private void parseInternal(InputSource input)
 295             throws IOException, SAXException {
 296 
 297         if (input == null) {
 298             fatal("P-000");
 299         }
 300 
 301         try {
 302             in = InputEntity.getInputEntity(dtdHandler, locale);
 303             in.init(input, null, null, false);
 304 
 305             dtdHandler.startDTD(in);
 306 
 307             // [30] extSubset ::= TextDecl? extSubsetDecl
 308             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
 309             //        | PEReference | S )*
 310             //    ... same as [79] extPE, which is where the code is
 311 
 312             ExternalEntity externalSubset = new ExternalEntity(in);
 313             externalParameterEntity(externalSubset);
 314 
 315             if (!in.isEOF()) {
 316                 fatal("P-001", new Object[]{Integer.toHexString(((int) getc()))});
 317             }
 318             afterRoot();
 319             dtdHandler.endDTD();
 320 
 321         } catch (EndOfInputException e) {
 322             if (!in.isDocument()) {
 323                 String name = in.getName();
 324                 do {    // force a relevant URI and line number
 325                     in = in.pop();
 326                 } while (in.isInternal());
 327                 fatal("P-002", new Object[]{name});
 328             } else {
 329                 fatal("P-003", null);
 330             }
 331         } catch (RuntimeException e) {
 332             LOGGER.log(Level.SEVERE, "Internal DTD parser error.", e);
 333             throw new SAXParseException(e.getMessage() != null
 334                     ? e.getMessage() : e.getClass().getName(),
 335                     getPublicId(), getSystemId(),
 336                     getLineNumber(), getColumnNumber());
 337 
 338         } finally {
 339             // recycle temporary data used during parsing
 340             strTmp = null;
 341             nameTmp = null;
 342             nameCache = null;
 343 
 344             // ditto input sources etc
 345             if (in != null) {
 346                 in.close();
 347                 in = null;
 348             }
 349 
 350             // get rid of all DTD info ... some of it would be
 351             // useful for editors etc, investigate later.
 352 
 353             params.clear();
 354             entities.clear();
 355             notations.clear();
 356             declaredElements.clear();
 357 //        elements.clear();
 358             ids.clear();
 359         }
 360     }
 361 
 362     void afterRoot() throws SAXException {
 363         // Make sure all IDREFs match declared ID attributes.  We scan
 364         // after the document element is parsed, since XML allows forward
 365         // references, and only now can we know if they're all resolved.
 366 
 367         for (Enumeration e = ids.keys();
 368                 e.hasMoreElements();) {
 369             String id = (String) e.nextElement();
 370             Boolean value = (Boolean) ids.get(id);
 371             if (Boolean.FALSE.equals(value)) {
 372                 error("V-024", new Object[]{id});
 373             }
 374         }
 375     }
 376 
 377     // role is for diagnostics
 378     private void whitespace(String roleId)
 379             throws IOException, SAXException {
 380 
 381         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
 382         if (!maybeWhitespace()) {
 383             fatal("P-004", new Object[]{messages.getMessage(locale, roleId)});
 384         }
 385     }
 386 
 387     // S?
 388     private boolean maybeWhitespace()
 389             throws IOException, SAXException {
 390 
 391         if (!doLexicalPE) {
 392             return in.maybeWhitespace();
 393         }
 394 
 395         // see getc() for the PE logic -- this lets us splice
 396         // expansions of PEs in "anywhere".  getc() has smarts,
 397         // so for external PEs we don't bypass it.
 398 
 399         // XXX we can marginally speed PE handling, and certainly
 400         // be cleaner (hence potentially more correct), by using
 401         // the observations that expanded PEs only start and stop
 402         // where whitespace is allowed.  getc wouldn't need any
 403         // "lexical" PE expansion logic, and no other method needs
 404         // to handle termination of PEs.  (parsing of literals would
 405         // still need to pop entities, but not parsing of references
 406         // in content.)
 407 
 408         char c = getc();
 409         boolean saw = false;
 410 
 411         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 412             saw = true;
 413 
 414             // this gracefully ends things when we stop playing
 415             // with internal parameters.  caller should have a
 416             // grammar rule allowing whitespace at end of entity.
 417             if (in.isEOF() && !in.isInternal()) {
 418                 return saw;
 419             }
 420             c = getc();
 421         }
 422         ungetc();
 423         return saw;
 424     }
 425 
 426     private String maybeGetName()
 427             throws IOException, SAXException {
 428 
 429         NameCacheEntry entry = maybeGetNameCacheEntry();
 430         return (entry == null) ? null : entry.name;
 431     }
 432 
 433     private NameCacheEntry maybeGetNameCacheEntry()
 434             throws IOException, SAXException {
 435 
 436         // [5] Name ::= (Letter|'_'|':') (Namechar)*
 437         char c = getc();
 438 
 439         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
 440             ungetc();
 441             return null;
 442         }
 443         return nameCharString(c);
 444     }
 445 
 446     // Used when parsing enumerations
 447     private String getNmtoken()
 448             throws IOException, SAXException {
 449 
 450         // [7] Nmtoken ::= (Namechar)+
 451         char c = getc();
 452         if (!XmlChars.isNameChar(c)) {
 453             fatal("P-006", new Object[]{Character.valueOf(c)});
 454         }
 455         return nameCharString(c).name;
 456     }
 457 
 458     // n.b. this gets used when parsing attribute values (for
 459     // internal references) so we can't use strTmp; it's also
 460     // a hotspot for CPU and memory in the parser (called at least
 461     // once for each element) so this has been optimized a bit.
 462     private NameCacheEntry nameCharString(char c)
 463             throws IOException, SAXException {
 464 
 465         int i = 1;
 466 
 467         nameTmp[0] = c;
 468         for (;;) {
 469             if ((c = in.getNameChar()) == 0) {
 470                 break;
 471             }
 472             if (i >= nameTmp.length) {
 473                 char tmp[] = new char[nameTmp.length + 10];
 474                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
 475                 nameTmp = tmp;
 476             }
 477             nameTmp[i++] = c;
 478         }
 479         return nameCache.lookupEntry(nameTmp, i);
 480     }
 481 
 482     //
 483     // much similarity between parsing entity values in DTD
 484     // and attribute values (in DTD or content) ... both follow
 485     // literal parsing rules, newline canonicalization, etc
 486     //
 487     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
 488     // or else partially normalized attribute value (the first bit
 489     // of 3.3.3's spec, without the "if not CDATA" bits).
 490     //
 491     @SuppressWarnings("UnusedAssignment")
 492     private void parseLiteral(boolean isEntityValue)
 493             throws IOException, SAXException {
 494 
 495         // [9] EntityValue ::=
 496         //    '"' ([^"&%] | Reference | PEReference)* '"'
 497         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
 498         // [10] AttValue ::=
 499         //    '"' ([^"&]  | Reference             )* '"'
 500         //    |    "'" ([^'&]  | Reference             )* "'"
 501         char quote = getc();
 502         char c;
 503         InputEntity source = in;
 504 
 505         if (quote != '\'' && quote != '"') {
 506             fatal("P-007");
 507         }
 508 
 509         // don't report entity expansions within attributes,
 510         // they're reported "fully expanded" via SAX
 511 //    isInAttribute = !isEntityValue;
 512 
 513         // get value into strTmp
 514         strTmp = new StringBuffer();
 515 
 516         // scan, allowing entity push/pop wherever ...
 517         // expanded entities can't terminate the literal!
 518         for (;;) {
 519             if (in != source && in.isEOF()) {
 520                 // we don't report end of parsed entities
 521                 // within attributes (no SAX hooks)
 522                 in = in.pop();
 523                 continue;
 524             }
 525             if ((c = getc()) == quote && in == source) {
 526                 break;
 527             }
 528 
 529             //
 530             // Basically the "reference in attribute value"
 531             // row of the chart in section 4.4 of the spec
 532             //
 533             if (c == '&') {
 534                 String entityName = maybeGetName();
 535 
 536                 if (entityName != null) {
 537                     nextChar(';', "F-020", entityName);
 538 
 539                     // 4.4 says:  bypass these here ... we'll catch
 540                     // forbidden refs to unparsed entities on use
 541                     if (isEntityValue) {
 542                         strTmp.append('&');
 543                         strTmp.append(entityName);
 544                         strTmp.append(';');
 545                         continue;
 546                     }
 547                     expandEntityInLiteral(entityName, entities, isEntityValue);
 548 
 549                     // character references are always included immediately
 550                 } else if ((getc()) == '#') {
 551                     int tmp = parseCharNumber();
 552 
 553                     if (tmp > 0xffff) {
 554                         tmp = surrogatesToCharTmp(tmp);
 555                         strTmp.append(charTmp[0]);
 556                         if (tmp == 2) {
 557                             strTmp.append(charTmp[1]);
 558                         }
 559                     } else {
 560                         strTmp.append((char) tmp);
 561                     }
 562                 } else {
 563                     fatal("P-009");
 564                 }
 565                 continue;
 566 
 567             }
 568 
 569             // expand parameter entities only within entity value literals
 570             if (c == '%' && isEntityValue) {
 571                 String entityName = maybeGetName();
 572 
 573                 if (entityName != null) {
 574                     nextChar(';', "F-021", entityName);
 575                     expandEntityInLiteral(entityName, params, isEntityValue);
 576                     continue;
 577                 } else {
 578                     fatal("P-011");
 579                 }
 580             }
 581 
 582             // For attribute values ...
 583             if (!isEntityValue) {
 584                 // 3.3.3 says whitespace normalizes to space...
 585                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 586                     strTmp.append(' ');
 587                     continue;
 588                 }
 589 
 590                 // "<" not legal in parsed literals ...
 591                 if (c == '<') {
 592                     fatal("P-012");
 593                 }
 594             }
 595 
 596             strTmp.append(c);
 597         }
 598 //    isInAttribute = false;
 599     }
 600 
 601     // does a SINGLE expansion of the entity (often reparsed later)
 602     private void expandEntityInLiteral(String name, SimpleHashtable table,
 603             boolean isEntityValue)
 604             throws IOException, SAXException {
 605 
 606         Object entity = table.get(name);
 607 
 608         if (entity instanceof InternalEntity) {
 609             InternalEntity value = (InternalEntity) entity;
 610             pushReader(value.buf, name, !value.isPE);
 611 
 612         } else if (entity instanceof ExternalEntity) {
 613             if (!isEntityValue) // must be a PE ...
 614             {
 615                 fatal("P-013", new Object[]{name});
 616             }
 617             // XXX if this returns false ...
 618             pushReader((ExternalEntity) entity);
 619 
 620         } else if (entity == null) {
 621             //
 622             // Note:  much confusion about whether spec requires such
 623             // errors to be fatal in many cases, but none about whether
 624             // it allows "normal" errors to be unrecoverable!
 625             //
 626             fatal((table == params) ? "V-022" : "P-014",
 627                     new Object[]{name});
 628         }
 629     }
 630 
 631     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
 632     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
 633     // NOTE:  XML spec should explicitly say that PE ref syntax is
 634     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
 635     // values ... can't process the XML spec's own DTD without doing
 636     // that for comments.
 637     private String getQuotedString(String type, String extra)
 638             throws IOException, SAXException {
 639 
 640         // use in.getc to bypass PE processing
 641         char quote = in.getc();
 642 
 643         if (quote != '\'' && quote != '"') {
 644             fatal("P-015", new Object[]{
 645                         messages.getMessage(locale, type, new Object[]{extra})
 646                     });
 647         }
 648 
 649         char c;
 650 
 651         strTmp = new StringBuffer();
 652         while ((c = in.getc()) != quote) {
 653             strTmp.append((char) c);
 654         }
 655         return strTmp.toString();
 656     }
 657 
 658     private String parsePublicId() throws IOException, SAXException {
 659 
 660         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
 661         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
 662         String retval = getQuotedString("F-033", null);
 663         for (int i = 0; i < retval.length(); i++) {
 664             char c = retval.charAt(i);
 665             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
 666                     && !(c >= 'A' && c <= 'Z')
 667                     && !(c >= 'a' && c <= 'z')) {
 668                 fatal("P-016", new Object[]{Character.valueOf(c)});
 669             }
 670         }
 671         strTmp = new StringBuffer();
 672         strTmp.append(retval);
 673         return normalize(false);
 674     }
 675 
 676     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 677     // handled by:  InputEntity.parsedContent()
 678     private boolean maybeComment(boolean skipStart)
 679             throws IOException, SAXException {
 680 
 681         // [15] Comment ::= '<!--'
 682         //        ( (Char - '-') | ('-' (Char - '-'))*
 683         //        '-->'
 684         if (!in.peek(skipStart ? "!--" : "<!--", null)) {
 685             return false;
 686         }
 687 
 688         boolean savedLexicalPE = doLexicalPE;
 689         boolean saveCommentText;
 690 
 691         doLexicalPE = false;
 692         saveCommentText = false;
 693         if (saveCommentText) {
 694             strTmp = new StringBuffer();
 695         }
 696 
 697         oneComment:
 698         for (;;) {
 699             try {
 700                 // bypass PE expansion, but permit PEs
 701                 // to complete ... valid docs won't care.
 702                 for (;;) {
 703                     int c = getc();
 704                     if (c == '-') {
 705                         c = getc();
 706                         if (c != '-') {
 707                             if (saveCommentText) {
 708                                 strTmp.append('-');
 709                             }
 710                             ungetc();
 711                             continue;
 712                         }
 713                         nextChar('>', "F-022", null);
 714                         break oneComment;
 715                     }
 716                     if (saveCommentText) {
 717                         strTmp.append((char) c);
 718                     }
 719                 }
 720             } catch (EndOfInputException e) {
 721                 //
 722                 // This is fatal EXCEPT when we're processing a PE...
 723                 // in which case a validating processor reports an error.
 724                 // External PEs are easy to detect; internal ones we
 725                 // infer by being an internal entity outside an element.
 726                 //
 727                 if (in.isInternal()) {
 728                     error("V-021", null);
 729                 }
 730                 fatal("P-017");
 731             }
 732         }
 733         doLexicalPE = savedLexicalPE;
 734         if (saveCommentText) {
 735             dtdHandler.comment(strTmp.toString());
 736         }
 737         return true;
 738     }
 739 
 740     private boolean maybePI(boolean skipStart)
 741             throws IOException, SAXException {
 742 
 743         // [16] PI ::= '<?' PITarget
 744         //        (S (Char* - (Char* '?>' Char*)))?
 745         //        '?>'
 746         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
 747         boolean savedLexicalPE = doLexicalPE;
 748 
 749         if (!in.peek(skipStart ? "?" : "<?", null)) {
 750             return false;
 751         }
 752         doLexicalPE = false;
 753 
 754         String target = maybeGetName();
 755 
 756         if (target == null) {
 757             fatal("P-018");
 758         }
 759         if ("xml".equals(target)) {
 760             fatal("P-019");
 761         }
 762         if ("xml".equalsIgnoreCase(target)) {
 763             fatal("P-020", new Object[]{target});
 764         }
 765 
 766         if (maybeWhitespace()) {
 767             strTmp = new StringBuffer();
 768             try {
 769                 for (;;) {
 770                     // use in.getc to bypass PE processing
 771                     char c = in.getc();
 772                     //Reached the end of PI.
 773                     if (c == '?' && in.peekc('>')) {
 774                         break;
 775                     }
 776                     strTmp.append(c);
 777                 }
 778             } catch (EndOfInputException e) {
 779                 fatal("P-021");
 780             }
 781             dtdHandler.processingInstruction(target, strTmp.toString());
 782         } else {
 783             if (!in.peek("?>", null)) {
 784                 fatal("P-022");
 785             }
 786             dtdHandler.processingInstruction(target, "");
 787         }
 788 
 789         doLexicalPE = savedLexicalPE;
 790         return true;
 791     }
 792 
 793     // [18] CDSect ::= CDStart CData CDEnd
 794     // [19] CDStart ::= '<![CDATA['
 795     // [20] CData ::= (Char* - (Char* ']]>' Char*))
 796     // [21] CDEnd ::= ']]>'
 797     //
 798     //    ... handled by InputEntity.unparsedContent()
 799     // collapsing several rules together ...
 800     // simpler than attribute literals -- no reference parsing!
 801     private String maybeReadAttribute(String name, boolean must)
 802             throws IOException, SAXException {
 803 
 804         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
 805         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
 806         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
 807         if (!maybeWhitespace()) {
 808             if (!must) {
 809                 return null;
 810             }
 811             fatal("P-024", new Object[]{name});
 812             // NOTREACHED
 813         }
 814 
 815         if (!peek(name)) {
 816             if (must) {
 817                 fatal("P-024", new Object[]{name});
 818             } else {
 819                 // To ensure that the whitespace is there so that when we
 820                 // check for the next attribute we assure that the
 821                 // whitespace still exists.
 822                 ungetc();
 823                 return null;
 824             }
 825         }
 826 
 827         // [25] Eq ::= S? '=' S?
 828         maybeWhitespace();
 829         nextChar('=', "F-023", null);
 830         maybeWhitespace();
 831 
 832         return getQuotedString("F-035", name);
 833     }
 834 
 835     private void readVersion(boolean must, String versionNum)
 836             throws IOException, SAXException {
 837 
 838         String value = maybeReadAttribute("version", must);
 839 
 840         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
 841 
 842         if (must && value == null) {
 843             fatal("P-025", new Object[]{versionNum});
 844         }
 845         if (value != null) {
 846             int length = value.length();
 847             for (int i = 0; i < length; i++) {
 848                 char c = value.charAt(i);
 849                 if (!((c >= '0' && c <= '9')
 850                         || c == '_' || c == '.'
 851                         || (c >= 'a' && c <= 'z')
 852                         || (c >= 'A' && c <= 'Z')
 853                         || c == ':' || c == '-')) {
 854                     fatal("P-026", new Object[]{value});
 855                 }
 856             }
 857         }
 858         if (value != null && !value.equals(versionNum)) {
 859             error("P-027", new Object[]{versionNum, value});
 860         }
 861     }
 862 
 863     // common code used by most markup declarations
 864     // ... S (Q)Name ...
 865     private String getMarkupDeclname(String roleId, boolean qname)
 866             throws IOException, SAXException {
 867 
 868         String name;
 869 
 870         whitespace(roleId);
 871         name = maybeGetName();
 872         if (name == null) {
 873             fatal("P-005", new Object[]{messages.getMessage(locale, roleId)});
 874         }
 875         return name;
 876     }
 877 
 878     private boolean maybeMarkupDecl()
 879             throws IOException, SAXException {
 880 
 881         // [29] markupdecl ::= elementdecl | Attlistdecl
 882         //           | EntityDecl | NotationDecl | PI | Comment
 883         return maybeElementDecl()
 884                 || maybeAttlistDecl()
 885                 || maybeEntityDecl()
 886                 || maybeNotationDecl()
 887                 || maybePI(false)
 888                 || maybeComment(false);
 889     }
 890     private static final String XmlLang = "xml:lang";
 891 
 892     private boolean isXmlLang(String value) {
 893 
 894         // [33] LanguageId ::= Langcode ('-' Subcode)*
 895         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
 896         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
 897         // [36] IanaCode ::= [iI] '-' SubCode
 898         // [37] UserCode ::= [xX] '-' SubCode
 899         // [38] SubCode ::= [a-zA-Z]+
 900 
 901         // the ISO and IANA codes (and subcodes) are registered,
 902         // but that's neither a WF nor a validity constraint.
 903 
 904         int nextSuffix;
 905         char c;
 906 
 907         if (value.length() < 2) {
 908             return false;
 909         }
 910         c = value.charAt(1);
 911         if (c == '-') {        // IANA, or user, code
 912             c = value.charAt(0);
 913             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) {
 914                 return false;
 915             }
 916             nextSuffix = 1;
 917         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 918             // 2 letter ISO code, or error
 919             c = value.charAt(0);
 920             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
 921                 return false;
 922             }
 923             nextSuffix = 2;
 924         } else {
 925             return false;
 926         }
 927 
 928         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
 929         while (nextSuffix < value.length()) {
 930             c = value.charAt(nextSuffix);
 931             if (c != '-') {
 932                 break;
 933             }
 934             while (++nextSuffix < value.length()) {
 935                 c = value.charAt(nextSuffix);
 936                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
 937                     break;
 938                 }
 939             }
 940         }
 941         return value.length() == nextSuffix && c != '-';
 942     }
 943 
 944     //
 945     // CHAPTER 3:  Logical Structures
 946     //
 947     /**
 948      * To validate, subclassers should at this time make sure that values are of
 949      * the declared types:<UL> <LI> ID and IDREF(S) values are Names <LI>
 950      * NMTOKEN(S) are Nmtokens <LI> ENUMERATION values match one of the tokens
 951      * <LI> NOTATION values match a notation name <LI> ENTITIY(IES) values match
 952      * an unparsed external entity </UL>
 953      * <p>
 954      * <P> Separately, make sure IDREF values match some ID provided in the
 955      * document (in the afterRoot method).
 956      */
 957     /*    void validateAttributeSyntax (Attribute attr, String value)
 958      throws DTDParseException {
 959      // ID, IDREF(S) ... values are Names
 960      if (Attribute.ID == attr.type()) {
 961      if (!XmlNames.isName (value))
 962      error ("V-025", new Object [] { value });
 963 
 964      Boolean             b = (Boolean) ids.getNonInterned (value);
 965      if (b == null || b.equals (Boolean.FALSE))
 966      ids.put (value.intern (), Boolean.TRUE);
 967      else
 968      error ("V-026", new Object [] { value });
 969 
 970      } else if (Attribute.IDREF == attr.type()) {
 971      if (!XmlNames.isName (value))
 972      error ("V-027", new Object [] { value });
 973 
 974      Boolean             b = (Boolean) ids.getNonInterned (value);
 975      if (b == null)
 976      ids.put (value.intern (), Boolean.FALSE);
 977 
 978      } else if (Attribute.IDREFS == attr.type()) {
 979      StringTokenizer     tokenizer = new StringTokenizer (value);
 980      Boolean             b;
 981      boolean             sawValue = false;
 982 
 983      while (tokenizer.hasMoreTokens ()) {
 984      value = tokenizer.nextToken ();
 985      if (!XmlNames.isName (value))
 986      error ("V-027", new Object [] { value });
 987      b = (Boolean) ids.getNonInterned (value);
 988      if (b == null)
 989      ids.put (value.intern (), Boolean.FALSE);
 990      sawValue = true;
 991      }
 992      if (!sawValue)
 993      error ("V-039", null);
 994 
 995 
 996      // NMTOKEN(S) ... values are Nmtoken(s)
 997      } else if (Attribute.NMTOKEN == attr.type()) {
 998      if (!XmlNames.isNmtoken (value))
 999      error ("V-028", new Object [] { value });
1000 
1001      } else if (Attribute.NMTOKENS == attr.type()) {
1002      StringTokenizer     tokenizer = new StringTokenizer (value);
1003      boolean             sawValue = false;
1004 
1005      while (tokenizer.hasMoreTokens ()) {
1006      value = tokenizer.nextToken ();
1007      if (!XmlNames.isNmtoken (value))
1008      error ("V-028", new Object [] { value });
1009      sawValue = true;
1010      }
1011      if (!sawValue)
1012      error ("V-032", null);
1013 
1014      // ENUMERATION ... values match one of the tokens
1015      } else if (Attribute.ENUMERATION == attr.type()) {
1016      for (int i = 0; i < attr.values().length; i++)
1017      if (value.equals (attr.values()[i]))
1018      return;
1019      error ("V-029", new Object [] { value });
1020 
1021      // NOTATION values match a notation name
1022      } else if (Attribute.NOTATION == attr.type()) {
1023      //
1024      // XXX XML 1.0 spec should probably list references to
1025      // externally defined notations in standalone docs as
1026      // validity errors.  Ditto externally defined unparsed
1027      // entities; neither should show up in attributes, else
1028      // one needs to read the external declarations in order
1029      // to make sense of the document (exactly what tagging
1030      // a doc as "standalone" intends you won't need to do).
1031      //
1032      for (int i = 0; i < attr.values().length; i++)
1033      if (value.equals (attr.values()[i]))
1034      return;
1035      error ("V-030", new Object [] { value });
1036 
1037      // ENTITY(IES) values match an unparsed entity(ies)
1038      } else if (Attribute.ENTITY == attr.type()) {
1039      // see note above re standalone
1040      if (!isUnparsedEntity (value))
1041      error ("V-031", new Object [] { value });
1042 
1043      } else if (Attribute.ENTITIES == attr.type()) {
1044      StringTokenizer     tokenizer = new StringTokenizer (value);
1045      boolean             sawValue = false;
1046 
1047      while (tokenizer.hasMoreTokens ()) {
1048      value = tokenizer.nextToken ();
1049      // see note above re standalone
1050      if (!isUnparsedEntity (value))
1051      error ("V-031", new Object [] { value });
1052      sawValue = true;
1053      }
1054      if (!sawValue)
1055      error ("V-040", null);
1056 
1057      } else if (Attribute.CDATA != attr.type())
1058      throw new InternalError (attr.type());
1059      }
1060      */
1061     /*
1062      private boolean isUnparsedEntity (String name)
1063      {
1064      Object e = entities.getNonInterned (name);
1065      if (e == null || !(e instanceof ExternalEntity))
1066      return false;
1067      return ((ExternalEntity)e).notation != null;
1068      }
1069      */
1070     private boolean maybeElementDecl()
1071             throws IOException, SAXException {
1072 
1073         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1074         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1075         InputEntity start = peekDeclaration("!ELEMENT");
1076 
1077         if (start == null) {
1078             return false;
1079         }
1080 
1081         // n.b. for content models where inter-element whitespace is
1082         // ignorable, we mark that fact here.
1083         String name = getMarkupDeclname("F-015", true);
1084 //    Element        element = (Element) elements.get (name);
1085 //    boolean        declEffective = false;
1086 
1087         /*
1088          if (element != null) {
1089          if (element.contentModel() != null) {
1090          error ("V-012", new Object [] { name });
1091          } // else <!ATTLIST name ...> came first
1092          } else {
1093          element = new Element(name);
1094          elements.put (element.name(), element);
1095          declEffective = true;
1096          }
1097          */
1098         if (declaredElements.contains(name)) {
1099             error("V-012", new Object[]{name});
1100         } else {
1101             declaredElements.add(name);
1102 //        declEffective = true;
1103         }
1104 
1105         short modelType;
1106         whitespace("F-000");
1107         if (peek(strEMPTY)) {
1108 ///        // leave element.contentModel as null for this case.
1109             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1110         } else if (peek(strANY)) {
1111 ///        element.setContentModel(new StringModel(StringModelType.ANY));
1112             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1113         } else {
1114             modelType = getMixedOrChildren(name);
1115         }
1116 
1117         dtdHandler.endContentModel(name, modelType);
1118 
1119         maybeWhitespace();
1120         char c = getc();
1121         if (c != '>') {
1122             fatal("P-036", new Object[]{name, Character.valueOf(c)});
1123         }
1124         if (start != in) {
1125             error("V-013", null);
1126         }
1127 
1128 ///        dtdHandler.elementDecl(element);
1129 
1130         return true;
1131     }
1132 
1133     // We're leaving the content model as a regular expression;
1134     // it's an efficient natural way to express such things, and
1135     // libraries often interpret them.  No whitespace in the
1136     // model we store, though!
1137     /**
1138      * returns content model type.
1139      */
1140     private short getMixedOrChildren(String elementName/*Element element*/)
1141             throws IOException, SAXException {
1142 
1143         InputEntity start;
1144 
1145         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1146         strTmp = new StringBuffer();
1147 
1148         nextChar('(', "F-028", elementName);
1149         start = in;
1150         maybeWhitespace();
1151         strTmp.append('(');
1152 
1153         short modelType;
1154         if (peek("#PCDATA")) {
1155             strTmp.append("#PCDATA");
1156             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1157             getMixed(elementName, start);
1158         } else {
1159             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1160             getcps(elementName, start);
1161         }
1162 
1163         return modelType;
1164     }
1165 
1166     // '(' S? already consumed
1167     // matching ')' must be in "start" entity if validating
1168     private void getcps(/*Element element,*/String elementName, InputEntity start)
1169             throws IOException, SAXException {
1170 
1171         // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1172         // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1173         // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
1174         boolean decided = false;
1175         char type = 0;
1176 //        ContentModel       retval, temp, current;
1177 
1178 //        retval = temp = current = null;
1179 
1180         dtdHandler.startModelGroup();
1181 
1182         do {
1183             String tag;
1184 
1185             tag = maybeGetName();
1186             if (tag != null) {
1187                 strTmp.append(tag);
1188 //                temp = new ElementModel(tag);
1189 //                getFrequency((RepeatableContent)temp);
1190 ///->
1191                 dtdHandler.childElement(tag, getFrequency());
1192 ///<-
1193             } else if (peek("(")) {
1194                 InputEntity next = in;
1195                 strTmp.append('(');
1196                 maybeWhitespace();
1197 //                temp = getcps(element, next);
1198 //                getFrequency(temp);
1199 ///->
1200                 getcps(elementName, next);
1201 ///                getFrequency();        <- this looks like a bug
1202 ///<-
1203             } else {
1204                 fatal((type == 0) ? "P-039"
1205                         : ((type == ',') ? "P-037" : "P-038"),
1206                         new Object[]{Character.valueOf(getc())});
1207             }
1208 
1209             maybeWhitespace();
1210             if (decided) {
1211                 char c = getc();
1212 
1213 //                if (current != null) {
1214 //                    current.addChild(temp);
1215 //                }
1216                 if (c == type) {
1217                     strTmp.append(type);
1218                     maybeWhitespace();
1219                     reportConnector(type);
1220                     continue;
1221                 } else if (c == '\u0029') {    // rparen
1222                     ungetc();
1223                     continue;
1224                 } else {
1225                     fatal((type == 0) ? "P-041" : "P-040",
1226                             new Object[]{
1227                                 Character.valueOf(c),
1228                                 Character.valueOf(type)
1229                             });
1230                 }
1231             } else {
1232                 type = getc();
1233                 switch (type) {
1234                     case '|':
1235                     case ',':
1236                         reportConnector(type);
1237                         break;
1238                     default:
1239 //                        retval = temp;
1240                         ungetc();
1241                         continue;
1242                 }
1243 //                retval = (ContentModel)current;
1244                 decided = true;
1245 //                current.addChild(temp);
1246                 strTmp.append(type);
1247             }
1248             maybeWhitespace();
1249         } while (!peek(")"));
1250 
1251         if (in != start) {
1252             error("V-014", new Object[]{elementName});
1253         }
1254         strTmp.append(')');
1255 
1256         dtdHandler.endModelGroup(getFrequency());
1257 //        return retval;
1258     }
1259 
1260     private void reportConnector(char type) throws SAXException {
1261         switch (type) {
1262             case '|':
1263                 dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
1264                 return;
1265             case ',':
1266                 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1267                 return;
1268             default:
1269                 throw new Error();    //assertion failed.
1270         }
1271     }
1272 
1273     private short getFrequency()
1274             throws IOException, SAXException {
1275 
1276         final char c = getc();
1277 
1278         if (c == '?') {
1279             strTmp.append(c);
1280             return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1281             //        original.setRepeat(Repeat.ZERO_OR_ONE);
1282         } else if (c == '+') {
1283             strTmp.append(c);
1284             return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1285             //        original.setRepeat(Repeat.ONE_OR_MORE);
1286         } else if (c == '*') {
1287             strTmp.append(c);
1288             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1289             //        original.setRepeat(Repeat.ZERO_OR_MORE);
1290         } else {
1291             ungetc();
1292             return DTDEventListener.OCCURENCE_ONCE;
1293         }
1294     }
1295 
1296     // '(' S? '#PCDATA' already consumed
1297     // matching ')' must be in "start" entity if validating
1298     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1299             throws IOException, SAXException {
1300 
1301         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1302         //        | '(' S? '#PCDATA'                   S? ')'
1303         maybeWhitespace();
1304         if (peek("\u0029*") || peek("\u0029")) {
1305             if (in != start) {
1306                 error("V-014", new Object[]{elementName});
1307             }
1308             strTmp.append(')');
1309 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1310             return;
1311         }
1312 
1313         ArrayList l = new ArrayList();
1314 //    l.add(new StringModel(StringModelType.PCDATA));
1315 
1316 
1317         while (peek("|")) {
1318             String name;
1319 
1320             strTmp.append('|');
1321             maybeWhitespace();
1322 
1323             doLexicalPE = true;
1324             name = maybeGetName();
1325             if (name == null) {
1326                 fatal("P-042", new Object[]{elementName, Integer.toHexString(getc())});
1327             }
1328             if (l.contains(name)) {
1329                 error("V-015", new Object[]{name});
1330             } else {
1331                 l.add(name);
1332                 dtdHandler.mixedElement(name);
1333             }
1334             strTmp.append(name);
1335             maybeWhitespace();
1336         }
1337 
1338         if (!peek("\u0029*")) // right paren
1339         {
1340             fatal("P-043", new Object[]{elementName, Character.valueOf(getc())});
1341         }
1342         if (in != start) {
1343             error("V-014", new Object[]{elementName});
1344         }
1345         strTmp.append(')');
1346 //        ChoiceModel cm = new ChoiceModel((Collection)l);
1347 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1348 //       element.setContentModel(cm);
1349     }
1350 
1351     private boolean maybeAttlistDecl()
1352             throws IOException, SAXException {
1353 
1354         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1355         InputEntity start = peekDeclaration("!ATTLIST");
1356 
1357         if (start == null) {
1358             return false;
1359         }
1360 
1361         String elementName = getMarkupDeclname("F-016", true);
1362 //    Element    element = (Element) elements.get (name);
1363 
1364 //    if (element == null) {
1365 //        // not yet declared -- no problem.
1366 //        element = new Element(name);
1367 //        elements.put(name, element);
1368 //    }
1369 
1370         while (!peek(">")) {
1371 
1372             // [53] AttDef ::= S Name S AttType S DefaultDecl
1373             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1374 
1375             // look for global attribute definitions, don't expand for now...
1376             maybeWhitespace();
1377             char c = getc();
1378             if (c == '%') {
1379                 String entityName = maybeGetName();
1380                 if (entityName != null) {
1381                     nextChar(';', "F-021", entityName);
1382                     whitespace("F-021");
1383                     continue;
1384                 } else {
1385                     fatal("P-011");
1386                 }
1387             }
1388 
1389             ungetc();
1390             // look for attribute name otherwise
1391             String attName = maybeGetName();
1392             if (attName == null) {
1393                 fatal("P-044", new Object[]{Character.valueOf(getc())});
1394             }
1395             whitespace("F-001");
1396 
1397 ///        Attribute    a = new Attribute (name);
1398 
1399             String typeName;
1400             Vector values = null;    // notation/enumeration values
1401 
1402             // Note:  use the type constants from Attribute
1403             // so that "==" may be used (faster)
1404 
1405             // [55] StringType ::= 'CDATA'
1406             if (peek(TYPE_CDATA)) ///            a.setType(Attribute.CDATA);
1407             {
1408                 typeName = TYPE_CDATA;
1409             } // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1410             //        | 'ENTITY' | 'ENTITIES'
1411             //        | 'NMTOKEN' | 'NMTOKENS'
1412             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1413             // match peekahead ... so this order matters!
1414             else if (peek(TYPE_IDREFS)) {
1415                 typeName = TYPE_IDREFS;
1416             } else if (peek(TYPE_IDREF)) {
1417                 typeName = TYPE_IDREF;
1418             } else if (peek(TYPE_ID)) {
1419                 typeName = TYPE_ID;
1420 // TODO: should implement this error check?
1421 ///        if (element.id() != null) {
1422 ///                    error ("V-016", new Object [] { element.id() });
1423 ///        } else
1424 ///            element.setId(name);
1425             } else if (peek(TYPE_ENTITY)) {
1426                 typeName = TYPE_ENTITY;
1427             } else if (peek(TYPE_ENTITIES)) {
1428                 typeName = TYPE_ENTITIES;
1429             } else if (peek(TYPE_NMTOKENS)) {
1430                 typeName = TYPE_NMTOKENS;
1431             } else if (peek(TYPE_NMTOKEN)) {
1432                 typeName = TYPE_NMTOKEN;
1433             } // [57] EnumeratedType ::= NotationType | Enumeration
1434             // [58] NotationType ::= 'NOTATION' S '(' S? Name
1435             //        (S? '|' S? Name)* S? ')'
1436             else if (peek(TYPE_NOTATION)) {
1437                 typeName = TYPE_NOTATION;
1438                 whitespace("F-002");
1439                 nextChar('(', "F-029", null);
1440                 maybeWhitespace();
1441 
1442                 values = new Vector();
1443                 do {
1444                     String name;
1445                     if ((name = maybeGetName()) == null) {
1446                         fatal("P-068");
1447                     }
1448                     // permit deferred declarations
1449                     if (notations.get(name) == null) {
1450                         notations.put(name, name);
1451                     }
1452                     values.addElement(name);
1453                     maybeWhitespace();
1454                     if (peek("|")) {
1455                         maybeWhitespace();
1456                     }
1457                 } while (!peek(")"));
1458 ///            a.setValues(new String [v.size ()]);
1459 ///            for (int i = 0; i < v.size (); i++)
1460 ///                a.setValue(i, (String)v.elementAt(i));
1461 
1462                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1463             } else if (peek("(")) {
1464 ///            a.setType(Attribute.ENUMERATION);
1465                 typeName = TYPE_ENUMERATION;
1466 
1467                 maybeWhitespace();
1468 
1469 ///            Vector v = new Vector ();
1470                 values = new Vector();
1471                 do {
1472                     String name = getNmtoken();
1473 ///                v.addElement (name);
1474                     values.addElement(name);
1475                     maybeWhitespace();
1476                     if (peek("|")) {
1477                         maybeWhitespace();
1478                     }
1479                 } while (!peek(")"));
1480 ///            a.setValues(new String [v.size ()]);
1481 ///            for (int i = 0; i < v.size (); i++)
1482 ///                a.setValue(i, (String)v.elementAt(i));
1483             } else {
1484                 fatal("P-045",
1485                         new Object[]{attName, Character.valueOf(getc())});
1486                 typeName = null;
1487             }
1488 
1489             short attributeUse;
1490             String defaultValue = null;
1491 
1492             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1493             //        | (('#FIXED' S)? AttValue)
1494             whitespace("F-003");
1495             if (peek("#REQUIRED")) {
1496                 attributeUse = DTDEventListener.USE_REQUIRED;
1497             } ///            a.setIsRequired(true);
1498             else if (peek("#FIXED")) {
1499 ///            if (a.type() == Attribute.ID)
1500                 if (typeName == TYPE_ID) {
1501                     error("V-017", new Object[]{attName});
1502                 }
1503 ///            a.setIsFixed(true);
1504                 attributeUse = DTDEventListener.USE_FIXED;
1505                 whitespace("F-004");
1506                 parseLiteral(false);
1507 ///            if (a.type() != Attribute.CDATA)
1508 ///                a.setDefaultValue(normalize(false));
1509 ///            else
1510 ///                a.setDefaultValue(strTmp.toString());
1511 
1512                 if (typeName == TYPE_CDATA) {
1513                     defaultValue = normalize(false);
1514                 } else {
1515                     defaultValue = strTmp.toString();
1516                 }
1517 
1518 // TODO: implement this check
1519 ///            if (a.type() != Attribute.CDATA)
1520 ///                validateAttributeSyntax (a, a.defaultValue());
1521             } else if (!peek("#IMPLIED")) {
1522                 attributeUse = DTDEventListener.USE_IMPLIED;
1523 
1524 ///            if (a.type() == Attribute.ID)
1525                 if (typeName == TYPE_ID) {
1526                     error("V-018", new Object[]{attName});
1527                 }
1528                 parseLiteral(false);
1529 ///            if (a.type() != Attribute.CDATA)
1530 ///                a.setDefaultValue(normalize(false));
1531 ///            else
1532 ///                a.setDefaultValue(strTmp.toString());
1533                 if (typeName == TYPE_CDATA) {
1534                     defaultValue = normalize(false);
1535                 } else {
1536                     defaultValue = strTmp.toString();
1537                 }
1538 
1539 // TODO: implement this check
1540 ///            if (a.type() != Attribute.CDATA)
1541 ///                validateAttributeSyntax (a, a.defaultValue());
1542             } else {
1543                 // TODO: this looks like an fatal error.
1544                 attributeUse = DTDEventListener.USE_NORMAL;
1545             }
1546 
1547             if (XmlLang.equals(attName)
1548                     && defaultValue/* a.defaultValue()*/ != null
1549                     && !isXmlLang(defaultValue/*a.defaultValue()*/)) {
1550                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
1551             }
1552 
1553 // TODO: isn't it an error to specify the same attribute twice?
1554 ///        if (!element.attributes().contains(a)) {
1555 ///            element.addAttribute(a);
1556 ///            dtdHandler.attributeDecl(a);
1557 ///        }
1558 
1559             String[] v = (values != null) ? (String[]) values.toArray(new String[values.size()]) : null;
1560             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1561             maybeWhitespace();
1562         }
1563         if (start != in) {
1564             error("V-013", null);
1565         }
1566         return true;
1567     }
1568 
1569     // used when parsing literal attribute values,
1570     // or public identifiers.
1571     //
1572     // input in strTmp
1573     private String normalize(boolean invalidIfNeeded) {
1574 
1575         // this can allocate an extra string...
1576 
1577         String s = strTmp.toString();
1578         String s2 = s.trim();
1579         boolean didStrip = false;
1580 
1581         if (s != s2) {
1582             s = s2;
1583             didStrip = true;
1584         }
1585         strTmp = new StringBuffer();
1586         for (int i = 0; i < s.length(); i++) {
1587             char c = s.charAt(i);
1588             if (!XmlChars.isSpace(c)) {
1589                 strTmp.append(c);
1590                 continue;
1591             }
1592             strTmp.append(' ');
1593             while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) {
1594                 didStrip = true;
1595             }
1596             i--;
1597         }
1598         if (didStrip) {
1599             return strTmp.toString();
1600         } else {
1601             return s;
1602         }
1603     }
1604 
1605     private boolean maybeConditionalSect()
1606             throws IOException, SAXException {
1607 
1608         // [61] conditionalSect ::= includeSect | ignoreSect
1609 
1610         if (!peek("<![")) {
1611             return false;
1612         }
1613 
1614         String keyword;
1615         InputEntity start = in;
1616 
1617         maybeWhitespace();
1618 
1619         if ((keyword = maybeGetName()) == null) {
1620             fatal("P-046");
1621         }
1622         maybeWhitespace();
1623         nextChar('[', "F-030", null);
1624 
1625         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1626         //                extSubsetDecl ']]>'
1627         if ("INCLUDE".equals(keyword)) {
1628             for (;;) {
1629                 while (in.isEOF() && in != start) {
1630                     in = in.pop();
1631                 }
1632                 if (in.isEOF()) {
1633                     error("V-020", null);
1634                 }
1635                 if (peek("]]>")) {
1636                     break;
1637                 }
1638 
1639                 doLexicalPE = false;
1640                 if (maybeWhitespace()) {
1641                     continue;
1642                 }
1643                 if (maybePEReference()) {
1644                     continue;
1645                 }
1646                 doLexicalPE = true;
1647                 if (maybeMarkupDecl() || maybeConditionalSect()) {
1648                     continue;
1649                 }
1650 
1651                 fatal("P-047");
1652             }
1653 
1654             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1655             //            ignoreSectcontents ']]>'
1656             // [64] ignoreSectcontents ::= Ignore ('<!['
1657             //            ignoreSectcontents ']]>' Ignore)*
1658             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1659         } else if ("IGNORE".equals(keyword)) {
1660             int nestlevel = 1;
1661             // ignoreSectcontents
1662             doLexicalPE = false;
1663             while (nestlevel > 0) {
1664                 char c = getc();    // will pop input entities
1665                 if (c == '<') {
1666                     if (peek("![")) {
1667                         nestlevel++;
1668                     }
1669                 } else if (c == ']') {
1670                     if (peek("]>")) {
1671                         nestlevel--;
1672                     }
1673                 } else {
1674                     continue;
1675                 }
1676             }
1677         } else {
1678             fatal("P-048", new Object[]{keyword});
1679         }
1680         return true;
1681     }
1682 
1683     //
1684     // CHAPTER 4:  Physical Structures
1685     //
1686     // parse decimal or hex numeric character reference
1687     private int parseCharNumber()
1688             throws IOException, SAXException {
1689 
1690         char c;
1691         int retval = 0;
1692 
1693         // n.b. we ignore overflow ...
1694         if (getc() != 'x') {
1695             ungetc();
1696             for (;;) {
1697                 c = getc();
1698                 if (c >= '0' && c <= '9') {
1699                     retval *= 10;
1700                     retval += (c - '0');
1701                     continue;
1702                 }
1703                 if (c == ';') {
1704                     return retval;
1705                 }
1706                 fatal("P-049");
1707             }
1708         } else {
1709             for (;;) {
1710                 c = getc();
1711                 if (c >= '0' && c <= '9') {
1712                     retval <<= 4;
1713                     retval += (c - '0');
1714                     continue;
1715                 }
1716                 if (c >= 'a' && c <= 'f') {
1717                     retval <<= 4;
1718                     retval += 10 + (c - 'a');
1719                     continue;
1720                 }
1721                 if (c >= 'A' && c <= 'F') {
1722                     retval <<= 4;
1723                     retval += 10 + (c - 'A');
1724                     continue;
1725                 }
1726                 if (c == ';') {
1727                     return retval;
1728                 }
1729                 fatal("P-050");
1730             }
1731         }
1732     }
1733 
1734     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1735     // though still subject to the 'Char' construct in XML
1736     private int surrogatesToCharTmp(int ucs4)
1737             throws SAXException {
1738 
1739         if (ucs4 <= 0xffff) {
1740             if (XmlChars.isChar(ucs4)) {
1741                 charTmp[0] = (char) ucs4;
1742                 return 1;
1743             }
1744         } else if (ucs4 <= 0x0010ffff) {
1745             // we represent these as UNICODE surrogate pairs
1746             ucs4 -= 0x10000;
1747             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1748             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1749             return 2;
1750         }
1751         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1752         // NOTREACHED
1753         return -1;
1754     }
1755 
1756     private boolean maybePEReference()
1757             throws IOException, SAXException {
1758 
1759         // This is the SYNTACTIC version of this construct.
1760         // When processing external entities, there is also
1761         // a LEXICAL version; see getc() and doLexicalPE.
1762 
1763         // [69] PEReference ::= '%' Name ';'
1764         if (!in.peekc('%')) {
1765             return false;
1766         }
1767 
1768         String name = maybeGetName();
1769         Object entity;
1770 
1771         if (name == null) {
1772             fatal("P-011");
1773         }
1774         nextChar(';', "F-021", name);
1775         entity = params.get(name);
1776 
1777         if (entity instanceof InternalEntity) {
1778             InternalEntity value = (InternalEntity) entity;
1779             pushReader(value.buf, name, false);
1780 
1781         } else if (entity instanceof ExternalEntity) {
1782             pushReader((ExternalEntity) entity);
1783             externalParameterEntity((ExternalEntity) entity);
1784 
1785         } else if (entity == null) {
1786             error("V-022", new Object[]{name});
1787         }
1788         return true;
1789     }
1790 
1791     private boolean maybeEntityDecl()
1792             throws IOException, SAXException {
1793 
1794         // [70] EntityDecl ::= GEDecl | PEDecl
1795         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1796         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1797         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1798         // [74] PEDef     ::= EntityValue |  ExternalID
1799         //
1800         InputEntity start = peekDeclaration("!ENTITY");
1801 
1802         if (start == null) {
1803             return false;
1804         }
1805 
1806         String entityName;
1807         SimpleHashtable defns;
1808         ExternalEntity externalId;
1809         boolean doStore;
1810 
1811         // PE expansion gets selectively turned off several places:
1812         // in ENTITY declarations (here), in comments, in PIs.
1813 
1814         // Here, we allow PE entities to be declared, and allows
1815         // literals to include PE refs without the added spaces
1816         // required with their expansion in markup decls.
1817 
1818         doLexicalPE = false;
1819         whitespace("F-005");
1820         if (in.peekc('%')) {
1821             whitespace("F-006");
1822             defns = params;
1823         } else {
1824             defns = entities;
1825         }
1826 
1827         ungetc();    // leave some whitespace
1828         doLexicalPE = true;
1829         entityName = getMarkupDeclname("F-017", false);
1830         whitespace("F-007");
1831         externalId = maybeExternalID();
1832 
1833         //
1834         // first definition sticks ... e.g. internal subset PEs are used
1835         // to override DTD defaults.  It's also an "error" to incorrectly
1836         // redefine builtin internal entities, but since reporting such
1837         // errors is optional we only give warnings ("just in case") for
1838         // non-parameter entities.
1839         //
1840         doStore = (defns.get(entityName) == null);
1841         if (!doStore && defns == entities) {
1842             warning("P-054", new Object[]{entityName});
1843         }
1844 
1845         // internal entities
1846         if (externalId == null) {
1847             char value[];
1848             InternalEntity entity;
1849 
1850             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
1851             parseLiteral(true);
1852             doLexicalPE = true;
1853             if (doStore) {
1854                 value = new char[strTmp.length()];
1855                 if (value.length != 0) {
1856                     strTmp.getChars(0, value.length, value, 0);
1857                 }
1858                 entity = new InternalEntity(entityName, value);
1859                 entity.isPE = (defns == params);
1860                 defns.put(entityName, entity);
1861                 if (defns == entities) {
1862                     dtdHandler.internalGeneralEntityDecl(entityName,
1863                             new String(value));
1864                 }
1865             }
1866 
1867             // external entities (including unparsed)
1868         } else {
1869             // [76] NDataDecl ::= S 'NDATA' S Name
1870             if (defns == entities && maybeWhitespace()
1871                     && peek("NDATA")) {
1872                 externalId.notation = getMarkupDeclname("F-018", false);
1873 
1874                 // flag undeclared notation for checking after
1875                 // the DTD is fully processed
1876                 if (notations.get(externalId.notation) == null) {
1877                     notations.put(externalId.notation, Boolean.TRUE);
1878                 }
1879             }
1880             externalId.name = entityName;
1881             externalId.isPE = (defns == params);
1882             if (doStore) {
1883                 defns.put(entityName, externalId);
1884                 if (externalId.notation != null) {
1885                     dtdHandler.unparsedEntityDecl(entityName,
1886                             externalId.publicId, externalId.systemId,
1887                             externalId.notation);
1888                 } else if (defns == entities) {
1889                     dtdHandler.externalGeneralEntityDecl(entityName,
1890                             externalId.publicId, externalId.systemId);
1891                 }
1892             }
1893         }
1894         maybeWhitespace();
1895         nextChar('>', "F-031", entityName);
1896         if (start != in) {
1897             error("V-013", null);
1898         }
1899         return true;
1900     }
1901 
1902     private ExternalEntity maybeExternalID()
1903             throws IOException, SAXException {
1904 
1905         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1906         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1907         String temp = null;
1908         ExternalEntity retval;
1909 
1910         if (peek("PUBLIC")) {
1911             whitespace("F-009");
1912             temp = parsePublicId();
1913         } else if (!peek("SYSTEM")) {
1914             return null;
1915         }
1916 
1917         retval = new ExternalEntity(in);
1918         retval.publicId = temp;
1919         whitespace("F-008");
1920         retval.systemId = parseSystemId();
1921         return retval;
1922     }
1923 
1924     private String parseSystemId()
1925             throws IOException, SAXException {
1926 
1927         String uri = getQuotedString("F-034", null);
1928         int temp = uri.indexOf(':');
1929 
1930         // resolve relative URIs ... must do it here since
1931         // it's relative to the source file holding the URI!
1932 
1933         // "new java.net.URL (URL, string)" conforms to RFC 1630,
1934         // but we can't use that except when the URI is a URL.
1935         // The entity resolver is allowed to handle URIs that are
1936         // not URLs, so we pass URIs through with scheme intact
1937         if (temp == -1 || uri.indexOf('/') < temp) {
1938             String baseURI;
1939 
1940             baseURI = in.getSystemId();
1941             if (baseURI == null) {
1942                 fatal("P-055", new Object[]{uri});
1943             }
1944             if (uri.length() == 0) {
1945                 uri = ".";
1946             }
1947             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1948             if (uri.charAt(0) != '/') {
1949                 uri = baseURI + uri;
1950             } else {
1951                 // XXX slashes at the beginning of a relative URI are
1952                 // a special case we don't handle.
1953                 throw new InternalError();
1954             }
1955 
1956             // letting other code map any "/xxx/../" or "/./" to "/",
1957             // since all URIs must handle it the same.
1958         }
1959         // check for fragment ID in URI
1960         if (uri.indexOf('#') != -1) {
1961             error("P-056", new Object[]{uri});
1962         }
1963         return uri;
1964     }
1965 
1966     private void maybeTextDecl()
1967             throws IOException, SAXException {
1968 
1969         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1970         if (peek("<?xml")) {
1971             readVersion(false, "1.0");
1972             readEncoding(true);
1973             maybeWhitespace();
1974             if (!peek("?>")) {
1975                 fatal("P-057");
1976             }
1977         }
1978     }
1979 
1980     private void externalParameterEntity(ExternalEntity next)
1981             throws IOException, SAXException {
1982 
1983         //
1984         // Reap the intended benefits of standalone declarations:
1985         // don't deal with external parameter entities, except to
1986         // validate the standalone declaration.
1987         //
1988 
1989         // n.b. "in external parameter entities" (and external
1990         // DTD subset, same grammar) parameter references can
1991         // occur "within" markup declarations ... expansions can
1992         // cross syntax rules.  Flagged here; affects getc().
1993 
1994         // [79] ExtPE ::= TextDecl? extSubsetDecl
1995         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1996         //        | PEReference | S )*
1997         InputEntity pe;
1998 
1999         // XXX if this returns false ...
2000 
2001         pe = in;
2002         maybeTextDecl();
2003         while (!pe.isEOF()) {
2004             // pop internal PEs (and whitespace before/after)
2005             if (in.isEOF()) {
2006                 in = in.pop();
2007                 continue;
2008             }
2009             doLexicalPE = false;
2010             if (maybeWhitespace()) {
2011                 continue;
2012             }
2013             if (maybePEReference()) {
2014                 continue;
2015             }
2016             doLexicalPE = true;
2017             if (maybeMarkupDecl() || maybeConditionalSect()) {
2018                 continue;
2019             }
2020             break;
2021         }
2022         // if (in != pe) throw new InternalError("who popped my PE?");
2023         if (!pe.isEOF()) {
2024             fatal("P-059", new Object[]{in.getName()});
2025         }
2026     }
2027 
2028     private void readEncoding(boolean must)
2029             throws IOException, SAXException {
2030 
2031         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2032         String name = maybeReadAttribute("encoding", must);
2033 
2034         if (name == null) {
2035             return;
2036         }
2037         for (int i = 0; i < name.length(); i++) {
2038             char c = name.charAt(i);
2039             if ((c >= 'A' && c <= 'Z')
2040                     || (c >= 'a' && c <= 'z')) {
2041                 continue;
2042             }
2043             if (i != 0
2044                     && ((c >= '0' && c <= '9')
2045                     || c == '-'
2046                     || c == '_'
2047                     || c == '.')) {
2048                 continue;
2049             }
2050             fatal("P-060", new Object[]{Character.valueOf(c)});
2051         }
2052 
2053         //
2054         // This should be the encoding in use, and it's even an error for
2055         // it to be anything else (in certain cases that are impractical to
2056         // to test, and may even be insufficient).  So, we do the best we
2057         // can, and warn if things look suspicious.  Note that Java doesn't
2058         // uniformly expose the encodings, and that the names it uses
2059         // internally are nonstandard.  Also, that the XML spec allows
2060         // such "errors" not to be reported at all.
2061         //
2062         String currentEncoding = in.getEncoding();
2063 
2064         if (currentEncoding != null
2065                 && !name.equalsIgnoreCase(currentEncoding)) {
2066             warning("P-061", new Object[]{name, currentEncoding});
2067         }
2068     }
2069 
2070     private boolean maybeNotationDecl()
2071             throws IOException, SAXException {
2072 
2073         // [82] NotationDecl ::= '<!NOTATION' S Name S
2074         //        (ExternalID | PublicID) S? '>'
2075         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2076         InputEntity start = peekDeclaration("!NOTATION");
2077 
2078         if (start == null) {
2079             return false;
2080         }
2081 
2082         String name = getMarkupDeclname("F-019", false);
2083         ExternalEntity entity = new ExternalEntity(in);
2084 
2085         whitespace("F-011");
2086         if (peek("PUBLIC")) {
2087             whitespace("F-009");
2088             entity.publicId = parsePublicId();
2089             if (maybeWhitespace()) {
2090                 if (!peek(">")) {
2091                     entity.systemId = parseSystemId();
2092                 } else {
2093                     ungetc();
2094                 }
2095             }
2096         } else if (peek("SYSTEM")) {
2097             whitespace("F-008");
2098             entity.systemId = parseSystemId();
2099         } else {
2100             fatal("P-062");
2101         }
2102         maybeWhitespace();
2103         nextChar('>', "F-032", name);
2104         if (start != in) {
2105             error("V-013", null);
2106         }
2107         if (entity.systemId != null && entity.systemId.indexOf('#') != -1) {
2108             error("P-056", new Object[]{entity.systemId});
2109         }
2110 
2111         Object value = notations.get(name);
2112         if (value != null && value instanceof ExternalEntity) {
2113             warning("P-063", new Object[]{name});
2114         } else {
2115             notations.put(name, entity);
2116             dtdHandler.notationDecl(name, entity.publicId,
2117                     entity.systemId);
2118         }
2119         return true;
2120     }
2121 
2122     ////////////////////////////////////////////////////////////////
2123     //
2124     //    UTILITIES
2125     //
2126     ////////////////////////////////////////////////////////////////
2127     private char getc() throws IOException, SAXException {
2128 
2129         if (!doLexicalPE) {
2130             char c = in.getc();
2131             return c;
2132         }
2133 
2134         //
2135         // External parameter entities get funky processing of '%param;'
2136         // references.  It's not clearly defined in the XML spec; but it
2137         // boils down to having those refs be _lexical_ in most cases to
2138         // include partial syntax productions.  It also needs selective
2139         // enabling; "<!ENTITY % foo ...>" must work, for example, and
2140         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2141         // if it's expanded in a literal, else "ab  cd".  PEs also do
2142         // not expand within comments or PIs, and external PEs are only
2143         // allowed to have markup decls (and so aren't handled lexically).
2144         //
2145         // This PE handling should be merged into maybeWhitespace, where
2146         // it can be dealt with more consistently.
2147         //
2148         // Also, there are some validity constraints in this area.
2149         //
2150         char c;
2151 
2152         while (in.isEOF()) {
2153             if (in.isInternal() || (doLexicalPE && !in.isDocument())) {
2154                 in = in.pop();
2155             } else {
2156                 fatal("P-064", new Object[]{in.getName()});
2157             }
2158         }
2159         if ((c = in.getc()) == '%' && doLexicalPE) {
2160             // PE ref ::= '%' name ';'
2161             String name = maybeGetName();
2162             Object entity;
2163 
2164             if (name == null) {
2165                 fatal("P-011");
2166             }
2167             nextChar(';', "F-021", name);
2168             entity = params.get(name);
2169 
2170             // push a magic "entity" before and after the
2171             // real one, so ungetc() behaves uniformly
2172             pushReader(" ".toCharArray(), null, false);
2173             if (entity instanceof InternalEntity) {
2174                 pushReader(((InternalEntity) entity).buf, name, false);
2175             } else if (entity instanceof ExternalEntity) // PEs can't be unparsed!
2176             // XXX if this returns false ...
2177             {
2178                 pushReader((ExternalEntity) entity);
2179             } else if (entity == null) // see note in maybePEReference re making this be nonfatal.
2180             {
2181                 fatal("V-022");
2182             } else {
2183                 throw new InternalError();
2184             }
2185             pushReader(" ".toCharArray(), null, false);
2186             return in.getc();
2187         }
2188         return c;
2189     }
2190 
2191     private void ungetc() {
2192 
2193         in.ungetc();
2194     }
2195 
2196     private boolean peek(String s)
2197             throws IOException, SAXException {
2198 
2199         return in.peek(s, null);
2200     }
2201 
2202     // Return the entity starting the specified declaration
2203     // (for validating declaration nesting) else null.
2204     private InputEntity peekDeclaration(String s)
2205             throws IOException, SAXException {
2206 
2207         InputEntity start;
2208 
2209         if (!in.peekc('<')) {
2210             return null;
2211         }
2212         start = in;
2213         if (in.peek(s, null)) {
2214             return start;
2215         }
2216         in.ungetc();
2217         return null;
2218     }
2219 
2220     private void nextChar(char c, String location, String near)
2221             throws IOException, SAXException {
2222 
2223         while (in.isEOF() && !in.isDocument()) {
2224             in = in.pop();
2225         }
2226         if (!in.peekc(c)) {
2227             fatal("P-008", new Object[]{Character.valueOf(c),
2228                         messages.getMessage(locale, location),
2229                         (near == null ? "" : ('"' + near + '"'))});
2230         }
2231     }
2232 
2233     private void pushReader(char buf[], String name, boolean isGeneral)
2234             throws SAXException {
2235 
2236         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2237         r.init(buf, name, in, !isGeneral);
2238         in = r;
2239     }
2240 
2241     private boolean pushReader(ExternalEntity next)
2242             throws IOException, SAXException {
2243 
2244         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2245         InputSource s;
2246         try {
2247             s = next.getInputSource(resolver);
2248         } catch (IOException e) {
2249             String msg =
2250                     "unable to open the external entity from :" + next.systemId;
2251             if (next.publicId != null) {
2252                 msg += " (public id:" + next.publicId + ")";
2253             }
2254 
2255             SAXParseException spe = new SAXParseException(msg,
2256                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2257             dtdHandler.fatalError(spe);
2258             throw e;
2259         }
2260 
2261         r.init(s, next.name, in, next.isPE);
2262         in = r;
2263         return true;
2264     }
2265 
2266     public String getPublicId() {
2267 
2268         return (in == null) ? null : in.getPublicId();
2269     }
2270 
2271     public String getSystemId() {
2272 
2273         return (in == null) ? null : in.getSystemId();
2274     }
2275 
2276     public int getLineNumber() {
2277 
2278         return (in == null) ? -1 : in.getLineNumber();
2279     }
2280 
2281     public int getColumnNumber() {
2282 
2283         return (in == null) ? -1 : in.getColumnNumber();
2284     }
2285 
2286     // error handling convenience routines
2287     private void warning(String messageId, Object parameters[])
2288             throws SAXException {
2289 
2290         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2291                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2292 
2293         dtdHandler.warning(e);
2294     }
2295 
2296     void error(String messageId, Object parameters[])
2297             throws SAXException {
2298 
2299         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2300                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2301 
2302         dtdHandler.error(e);
2303     }
2304 
2305     private void fatal(String messageId) throws SAXException {
2306 
2307         fatal(messageId, null);
2308     }
2309 
2310     private void fatal(String messageId, Object parameters[])
2311             throws SAXException {
2312 
2313         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2314                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2315 
2316         dtdHandler.fatalError(e);
2317 
2318         throw e;
2319     }
2320 
2321     //
2322     // Map char arrays to strings ... cuts down both on memory and
2323     // CPU usage for element/attribute/other names that are reused.
2324     //
2325     // Documents typically repeat names a lot, so we more or less
2326     // intern all the strings within the document; since some strings
2327     // are repeated in multiple documents (e.g. stylesheets) we go
2328     // a bit further, and intern globally.
2329     //
2330     static class NameCache {
2331         //
2332         // Unless we auto-grow this, the default size should be a
2333         // reasonable bit larger than needed for most XML files
2334         // we've yet seen (and be prime).  If it's too small, the
2335         // penalty is just excess cache collisions.
2336         //
2337 
2338         NameCacheEntry hashtable[] = new NameCacheEntry[541];
2339 
2340         //
2341         // Usually we just want to get the 'symbol' for these chars
2342         //
2343         String lookup(char value[], int len) {
2344 
2345             return lookupEntry(value, len).name;
2346         }
2347 
2348         //
2349         // Sometimes we need to scan the chars in the resulting
2350         // string, so there's an accessor which exposes them.
2351         // (Mostly for element end tags.)
2352         //
2353         NameCacheEntry lookupEntry(char value[], int len) {
2354 
2355             int index = 0;
2356             NameCacheEntry entry;
2357 
2358             // hashing to get index
2359             for (int i = 0; i < len; i++) {
2360                 index = index * 31 + value[i];
2361             }
2362             index &= 0x7fffffff;
2363             index %= hashtable.length;
2364 
2365             // return entry if one's there ...
2366             for (entry = hashtable[index];
2367                     entry != null;
2368                     entry = entry.next) {
2369                 if (entry.matches(value, len)) {
2370                     return entry;
2371                 }
2372             }
2373 
2374             // else create new one
2375             entry = new NameCacheEntry();
2376             entry.chars = new char[len];
2377             System.arraycopy(value, 0, entry.chars, 0, len);
2378             entry.name = new String(entry.chars);
2379             //
2380             // NOTE:  JDK 1.1 has a fixed size string intern table,
2381             // with non-GC'd entries.  It can panic here; that's a
2382             // JDK problem, use 1.2 or later with many identifiers.
2383             //
2384             entry.name = entry.name.intern();        // "global" intern
2385             entry.next = hashtable[index];
2386             hashtable[index] = entry;
2387             return entry;
2388         }
2389     }
2390 
2391     static class NameCacheEntry {
2392 
2393         String name;
2394         char chars[];
2395         NameCacheEntry next;
2396 
2397         boolean matches(char value[], int len) {
2398             if (chars == null || chars.length != len) {
2399                 return false;
2400             }
2401             for (int i = 0; i < len; i++) {
2402                 if (value[i] != chars[i]) {
2403                     return false;
2404                 }
2405             }
2406             return true;
2407         }
2408     }
2409 
2410     //
2411     // Message catalog for diagnostics.
2412     //
2413     static final Catalog messages = new Catalog();
2414 
2415     static final class Catalog extends MessageCatalog {
2416 
2417         Catalog() {
2418             super(DTDParser.class);
2419         }
2420     }
2421 }