1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 import org.xml.sax.Locator;
  31 import org.xml.sax.SAXException;
  32 import org.xml.sax.SAXParseException;
  33 
  34 import java.io.IOException;
  35 import java.util.ArrayList;
  36 import java.util.Enumeration;
  37 import java.util.Hashtable;
  38 import java.util.Locale;
  39 import java.util.Set;
  40 import java.util.Vector;
  41 
  42 /**
  43  * This implements parsing of XML 1.0 DTDs.
  44  * <p/>
  45  * This conforms to the portion of the XML 1.0 specification related
  46  * to the external DTD subset.
  47  * <p/>
  48  * For multi-language applications (such as web servers using XML
  49  * processing to create dynamic content), a method supports choosing
  50  * a locale for parser diagnostics which is both understood by the
  51  * message recipient and supported by the parser.
  52  * <p/>
  53  * This parser produces a stream of parse events.  It supports some
  54  * features (exposing comments, CDATA sections, and entity references)
  55  * which are not required to be reported by conformant XML processors.
  56  *
  57  * @author David Brownell
  58  * @author Janet Koenig
  59  * @author Kohsuke KAWAGUCHI
  60  * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
  61  */
  62 public class DTDParser {
  63     public final static String TYPE_CDATA = "CDATA";
  64     public final static String TYPE_ID = "ID";
  65     public final static String TYPE_IDREF = "IDREF";
  66     public final static String TYPE_IDREFS = "IDREFS";
  67     public final static String TYPE_ENTITY = "ENTITY";
  68     public final static String TYPE_ENTITIES = "ENTITIES";
  69     public final static String TYPE_NMTOKEN = "NMTOKEN";
  70     public final static String TYPE_NMTOKENS = "NMTOKENS";
  71     public final static String TYPE_NOTATION = "NOTATION";
  72     public final static String TYPE_ENUMERATION = "ENUMERATION";
  73 
  74 
  75     // stack of input entities being merged
  76     private InputEntity in;
  77 
  78     // temporaries reused during parsing
  79     private StringBuffer strTmp;
  80     private char nameTmp [];
  81     private NameCache nameCache;
  82     private char charTmp [] = new char[2];
  83 
  84     // temporary DTD parsing state
  85     private boolean doLexicalPE;
  86 
  87     // DTD state, used during parsing
  88 //    private SimpleHashtable    elements = new SimpleHashtable (47);
  89     protected final Set declaredElements = new java.util.HashSet();
  90     private SimpleHashtable params = new SimpleHashtable(7);
  91 
  92     // exposed to package-private subclass
  93     Hashtable notations = new Hashtable(7);
  94     SimpleHashtable entities = new SimpleHashtable(17);
  95 
  96     private SimpleHashtable ids = new SimpleHashtable();
  97 
  98     // listeners for DTD parsing events
  99     private DTDEventListener dtdHandler;
 100 
 101     private EntityResolver resolver;
 102     private Locale locale;
 103 
 104     // string constants -- use these copies so "==" works
 105     // package private
 106     static final String strANY = "ANY";
 107     static final String strEMPTY = "EMPTY";
 108 
 109     /**
 110      * Used by applications to request locale for diagnostics.
 111      *
 112      * @param l The locale to use, or null to use system defaults
 113      *          (which may include only message IDs).
 114      */
 115     public void setLocale(Locale l) throws SAXException {
 116 
 117         if (l != null && !messages.isLocaleSupported(l.toString())) {
 118             throw new SAXException(messages.getMessage(locale,
 119                     "P-078", new Object[]{l}));
 120         }
 121         locale = l;
 122     }
 123 
 124     /**
 125      * Returns the diagnostic locale.
 126      */
 127     public Locale getLocale() {
 128         return locale;
 129     }
 130 
 131     /**
 132      * Chooses a client locale to use for diagnostics, using the first
 133      * language specified in the list that is supported by this parser.
 134      * That locale is then set using <a href="#setLocale(java.util.Locale)">
 135      * setLocale()</a>.  Such a list could be provided by a variety of user
 136      * preference mechanisms, including the HTTP <em>Accept-Language</em>
 137      * header field.
 138      *
 139      * @param languages Array of language specifiers, ordered with the most
 140      *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
 141      *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
 142      * @return The chosen locale, or null.
 143      * @see MessageCatalog
 144      */
 145     public Locale chooseLocale(String languages [])
 146             throws SAXException {
 147 
 148         Locale l = messages.chooseLocale(languages);
 149 
 150         if (l != null) {
 151             setLocale(l);
 152         }
 153         return l;
 154     }
 155 
 156     /**
 157      * Lets applications control entity resolution.
 158      */
 159     public void setEntityResolver(EntityResolver r) {
 160 
 161         resolver = r;
 162     }
 163 
 164     /**
 165      * Returns the object used to resolve entities
 166      */
 167     public EntityResolver getEntityResolver() {
 168 
 169         return resolver;
 170     }
 171 
 172     /**
 173      * Used by applications to set handling of DTD parsing events.
 174      */
 175     public void setDtdHandler(DTDEventListener handler) {
 176         dtdHandler = handler;
 177         if (handler != null)
 178             handler.setDocumentLocator(new Locator() {
 179                 public String getPublicId() {
 180                     return DTDParser.this.getPublicId();
 181                 }
 182 
 183                 public String getSystemId() {
 184                     return DTDParser.this.getSystemId();
 185                 }
 186 
 187                 public int getLineNumber() {
 188                     return DTDParser.this.getLineNumber();
 189                 }
 190 
 191                 public int getColumnNumber() {
 192                     return DTDParser.this.getColumnNumber();
 193                 }
 194             });
 195     }
 196 
 197     /**
 198      * Returns the handler used to for DTD parsing events.
 199      */
 200     public DTDEventListener getDtdHandler() {
 201         return dtdHandler;
 202     }
 203 
 204     /**
 205      * Parse a DTD.
 206      */
 207     public void parse(InputSource in)
 208             throws IOException, SAXException {
 209         init();
 210         parseInternal(in);
 211     }
 212 
 213     /**
 214      * Parse a DTD.
 215      */
 216     public void parse(String uri)
 217             throws IOException, SAXException {
 218         InputSource in;
 219 
 220         init();
 221         // System.out.println ("parse (\"" + uri + "\")");
 222         in = resolver.resolveEntity(null, uri);
 223 
 224         // If custom resolver punts resolution to parser, handle it ...
 225         if (in == null) {
 226             in = Resolver.createInputSource(new java.net.URL(uri), false);
 227 
 228             // ... or if custom resolver doesn't correctly construct the
 229             // input entity, patch it up enough so relative URIs work, and
 230             // issue a warning to minimize later confusion.
 231         } else if (in.getSystemId() == null) {
 232             warning("P-065", null);
 233             in.setSystemId(uri);
 234         }
 235 
 236         parseInternal(in);
 237     }
 238 
 239     // makes sure the parser is reset to "before a document"
 240     private void init() {
 241         in = null;
 242 
 243         // alloc temporary data used in parsing
 244         strTmp = new StringBuffer();
 245         nameTmp = new char[20];
 246         nameCache = new NameCache();
 247 
 248         // reset doc info
 249 //        isInAttribute = false;
 250 
 251         doLexicalPE = false;
 252 
 253         entities.clear();
 254         notations.clear();
 255         params.clear();
 256         //    elements.clear ();
 257         declaredElements.clear();
 258 
 259         // initialize predefined references ... re-interpreted later
 260         builtin("amp", "&");
 261         builtin("lt", "<");
 262         builtin("gt", ">");
 263         builtin("quot", "\"");
 264         builtin("apos", "'");
 265 
 266         if (locale == null)
 267             locale = Locale.getDefault();
 268         if (resolver == null)
 269             resolver = new Resolver();
 270         if (dtdHandler == null)
 271             dtdHandler = new DTDHandlerBase();
 272     }
 273 
 274     private void builtin(String entityName, String entityValue) {
 275         InternalEntity entity;
 276         entity = new InternalEntity(entityName, entityValue.toCharArray());
 277         entities.put(entityName, entity);
 278     }
 279 
 280 
 281     ////////////////////////////////////////////////////////////////
 282     //
 283     // parsing is by recursive descent, code roughly
 284     // following the BNF rules except tweaked for simple
 285     // lookahead.  rules are more or less in numeric order,
 286     // except where code sharing suggests other structures.
 287     //
 288     // a classic benefit of recursive descent parsers:  it's
 289     // relatively easy to get diagnostics that make sense.
 290     //
 291     ////////////////////////////////////////////////////////////////
 292 
 293 
 294     private void parseInternal(InputSource input)
 295             throws IOException, SAXException {
 296 
 297         if (input == null)
 298             fatal("P-000");
 299 
 300         try {
 301             in = InputEntity.getInputEntity(dtdHandler, locale);
 302             in.init(input, null, null, false);
 303 
 304             dtdHandler.startDTD(in);
 305 
 306             // [30] extSubset ::= TextDecl? extSubsetDecl
 307             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
 308             //        | PEReference | S )*
 309             //    ... same as [79] extPE, which is where the code is
 310 
 311             ExternalEntity externalSubset = new ExternalEntity(in);
 312             externalParameterEntity(externalSubset);
 313 
 314             if (!in.isEOF()) {
 315                 fatal("P-001", new Object[]
 316                 {Integer.toHexString(((int) getc()))});
 317             }
 318             afterRoot();
 319             dtdHandler.endDTD();
 320 
 321         } catch (EndOfInputException e) {
 322             if (!in.isDocument()) {
 323                 String name = in.getName();
 324                 do {    // force a relevant URI and line number
 325                     in = in.pop();
 326                 } while (in.isInternal());
 327                 fatal("P-002", new Object[]{name});
 328             } else {
 329                 fatal("P-003", null);
 330             }
 331         } catch (RuntimeException e) {
 332             // Don't discard location that triggered the exception
 333             // ## Should properly wrap exception
 334             System.err.print("Internal DTD parser error: "); // ##
 335             e.printStackTrace();
 336             throw new SAXParseException(e.getMessage() != null
 337                     ? e.getMessage() : e.getClass().getName(),
 338                     getPublicId(), getSystemId(),
 339                     getLineNumber(), getColumnNumber());
 340 
 341         } finally {
 342             // recycle temporary data used during parsing
 343             strTmp = null;
 344             nameTmp = null;
 345             nameCache = null;
 346 
 347             // ditto input sources etc
 348             if (in != null) {
 349                 in.close();
 350                 in = null;
 351             }
 352 
 353             // get rid of all DTD info ... some of it would be
 354             // useful for editors etc, investigate later.
 355 
 356             params.clear();
 357             entities.clear();
 358             notations.clear();
 359             declaredElements.clear();
 360 //        elements.clear();
 361             ids.clear();
 362         }
 363     }
 364 
 365     void afterRoot() throws SAXException {
 366         // Make sure all IDREFs match declared ID attributes.  We scan
 367         // after the document element is parsed, since XML allows forward
 368         // references, and only now can we know if they're all resolved.
 369 
 370         for (Enumeration e = ids.keys();
 371              e.hasMoreElements();
 372                 ) {
 373             String id = (String) e.nextElement();
 374             Boolean value = (Boolean) ids.get(id);
 375             if (Boolean.FALSE == value)
 376                 error("V-024", new Object[]{id});
 377         }
 378     }
 379 
 380 
 381     // role is for diagnostics
 382     private void whitespace(String roleId)
 383             throws IOException, SAXException {
 384 
 385         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
 386         if (!maybeWhitespace()) {
 387             fatal("P-004", new Object[]
 388             {messages.getMessage(locale, roleId)});
 389         }
 390     }
 391 
 392     // S?
 393     private boolean maybeWhitespace()
 394             throws IOException, SAXException {
 395 
 396         if (!doLexicalPE)
 397             return in.maybeWhitespace();
 398 
 399         // see getc() for the PE logic -- this lets us splice
 400         // expansions of PEs in "anywhere".  getc() has smarts,
 401         // so for external PEs we don't bypass it.
 402 
 403         // XXX we can marginally speed PE handling, and certainly
 404         // be cleaner (hence potentially more correct), by using
 405         // the observations that expanded PEs only start and stop
 406         // where whitespace is allowed.  getc wouldn't need any
 407         // "lexical" PE expansion logic, and no other method needs
 408         // to handle termination of PEs.  (parsing of literals would
 409         // still need to pop entities, but not parsing of references
 410         // in content.)
 411 
 412         char c = getc();
 413         boolean saw = false;
 414 
 415         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 416             saw = true;
 417 
 418             // this gracefully ends things when we stop playing
 419             // with internal parameters.  caller should have a
 420             // grammar rule allowing whitespace at end of entity.
 421             if (in.isEOF() && !in.isInternal())
 422                 return saw;
 423             c = getc();
 424         }
 425         ungetc();
 426         return saw;
 427     }
 428 
 429     private String maybeGetName()
 430             throws IOException, SAXException {
 431 
 432         NameCacheEntry entry = maybeGetNameCacheEntry();
 433         return (entry == null) ? null : entry.name;
 434     }
 435 
 436     private NameCacheEntry maybeGetNameCacheEntry()
 437             throws IOException, SAXException {
 438 
 439         // [5] Name ::= (Letter|'_'|':') (Namechar)*
 440         char c = getc();
 441 
 442         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
 443             ungetc();
 444             return null;
 445         }
 446         return nameCharString(c);
 447     }
 448 
 449     // Used when parsing enumerations
 450     private String getNmtoken()
 451             throws IOException, SAXException {
 452 
 453         // [7] Nmtoken ::= (Namechar)+
 454         char c = getc();
 455         if (!XmlChars.isNameChar(c))
 456             fatal("P-006", new Object[]{new Character(c)});
 457         return nameCharString(c).name;
 458     }
 459 
 460     // n.b. this gets used when parsing attribute values (for
 461     // internal references) so we can't use strTmp; it's also
 462     // a hotspot for CPU and memory in the parser (called at least
 463     // once for each element) so this has been optimized a bit.
 464 
 465     private NameCacheEntry nameCharString(char c)
 466             throws IOException, SAXException {
 467 
 468         int i = 1;
 469 
 470         nameTmp[0] = c;
 471         for (; ;) {
 472             if ((c = in.getNameChar()) == 0)
 473                 break;
 474             if (i >= nameTmp.length) {
 475                 char tmp [] = new char[nameTmp.length + 10];
 476                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
 477                 nameTmp = tmp;
 478             }
 479             nameTmp[i++] = c;
 480         }
 481         return nameCache.lookupEntry(nameTmp, i);
 482     }
 483 
 484     //
 485     // much similarity between parsing entity values in DTD
 486     // and attribute values (in DTD or content) ... both follow
 487     // literal parsing rules, newline canonicalization, etc
 488     //
 489     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
 490     // or else partially normalized attribute value (the first bit
 491     // of 3.3.3's spec, without the "if not CDATA" bits).
 492     //
 493     private void parseLiteral(boolean isEntityValue)
 494             throws IOException, SAXException {
 495 
 496         // [9] EntityValue ::=
 497         //    '"' ([^"&%] | Reference | PEReference)* '"'
 498         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
 499         // [10] AttValue ::=
 500         //    '"' ([^"&]  | Reference             )* '"'
 501         //    |    "'" ([^'&]  | Reference             )* "'"
 502         char quote = getc();
 503         char c;
 504         InputEntity source = in;
 505 
 506         if (quote != '\'' && quote != '"') {
 507             fatal("P-007");
 508         }
 509 
 510         // don't report entity expansions within attributes,
 511         // they're reported "fully expanded" via SAX
 512 //    isInAttribute = !isEntityValue;
 513 
 514         // get value into strTmp
 515         strTmp = new StringBuffer();
 516 
 517         // scan, allowing entity push/pop wherever ...
 518         // expanded entities can't terminate the literal!
 519         for (; ;) {
 520             if (in != source && in.isEOF()) {
 521                 // we don't report end of parsed entities
 522                 // within attributes (no SAX hooks)
 523                 in = in.pop();
 524                 continue;
 525             }
 526             if ((c = getc()) == quote && in == source) {
 527                 break;
 528             }
 529 
 530             //
 531             // Basically the "reference in attribute value"
 532             // row of the chart in section 4.4 of the spec
 533             //
 534             if (c == '&') {
 535                 String entityName = maybeGetName();
 536 
 537                 if (entityName != null) {
 538                     nextChar(';', "F-020", entityName);
 539 
 540                     // 4.4 says:  bypass these here ... we'll catch
 541                     // forbidden refs to unparsed entities on use
 542                     if (isEntityValue) {
 543                         strTmp.append('&');
 544                         strTmp.append(entityName);
 545                         strTmp.append(';');
 546                         continue;
 547                     }
 548                     expandEntityInLiteral(entityName, entities, isEntityValue);
 549 
 550 
 551                     // character references are always included immediately
 552                 } else if ((c = getc()) == '#') {
 553                     int tmp = parseCharNumber();
 554 
 555                     if (tmp > 0xffff) {
 556                         tmp = surrogatesToCharTmp(tmp);
 557                         strTmp.append(charTmp[0]);
 558                         if (tmp == 2)
 559                             strTmp.append(charTmp[1]);
 560                     } else
 561                         strTmp.append((char) tmp);
 562                 } else
 563                     fatal("P-009");
 564                 continue;
 565 
 566             }
 567 
 568             // expand parameter entities only within entity value literals
 569             if (c == '%' && isEntityValue) {
 570                 String entityName = maybeGetName();
 571 
 572                 if (entityName != null) {
 573                     nextChar(';', "F-021", entityName);
 574                     expandEntityInLiteral(entityName, params, isEntityValue);
 575                     continue;
 576                 } else
 577                     fatal("P-011");
 578             }
 579 
 580             // For attribute values ...
 581             if (!isEntityValue) {
 582                 // 3.3.3 says whitespace normalizes to space...
 583                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 584                     strTmp.append(' ');
 585                     continue;
 586                 }
 587 
 588                 // "<" not legal in parsed literals ...
 589                 if (c == '<')
 590                     fatal("P-012");
 591             }
 592 
 593             strTmp.append(c);
 594         }
 595 //    isInAttribute = false;
 596     }
 597 
 598     // does a SINGLE expansion of the entity (often reparsed later)
 599     private void expandEntityInLiteral(String name, SimpleHashtable table,
 600                                        boolean isEntityValue)
 601             throws IOException, SAXException {
 602 
 603         Object entity = table.get(name);
 604 
 605         if (entity instanceof InternalEntity) {
 606             InternalEntity value = (InternalEntity) entity;
 607             pushReader(value.buf, name, !value.isPE);
 608 
 609         } else if (entity instanceof ExternalEntity) {
 610             if (!isEntityValue)    // must be a PE ...
 611                 fatal("P-013", new Object[]{name});
 612             // XXX if this returns false ...
 613             pushReader((ExternalEntity) entity);
 614 
 615         } else if (entity == null) {
 616             //
 617             // Note:  much confusion about whether spec requires such
 618             // errors to be fatal in many cases, but none about whether
 619             // it allows "normal" errors to be unrecoverable!
 620             //
 621             fatal((table == params) ? "V-022" : "P-014",
 622                     new Object[]{name});
 623         }
 624     }
 625 
 626     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
 627     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
 628 
 629     // NOTE:  XML spec should explicitly say that PE ref syntax is
 630     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
 631     // values ... can't process the XML spec's own DTD without doing
 632     // that for comments.
 633 
 634     private String getQuotedString(String type, String extra)
 635             throws IOException, SAXException {
 636 
 637         // use in.getc to bypass PE processing
 638         char quote = in.getc();
 639 
 640         if (quote != '\'' && quote != '"')
 641             fatal("P-015", new Object[]{
 642                 messages.getMessage(locale, type, new Object[]{extra})
 643             });
 644 
 645         char c;
 646 
 647         strTmp = new StringBuffer();
 648         while ((c = in.getc()) != quote)
 649             strTmp.append((char) c);
 650         return strTmp.toString();
 651     }
 652 
 653 
 654     private String parsePublicId() throws IOException, SAXException {
 655 
 656         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
 657         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
 658         String retval = getQuotedString("F-033", null);
 659         for (int i = 0; i < retval.length(); i++) {
 660             char c = retval.charAt(i);
 661             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
 662                     && !(c >= 'A' && c <= 'Z')
 663                     && !(c >= 'a' && c <= 'z'))
 664                 fatal("P-016", new Object[]{new Character(c)});
 665         }
 666         strTmp = new StringBuffer();
 667         strTmp.append(retval);
 668         return normalize(false);
 669     }
 670 
 671     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 672     // handled by:  InputEntity.parsedContent()
 673 
 674     private boolean maybeComment(boolean skipStart)
 675             throws IOException, SAXException {
 676 
 677         // [15] Comment ::= '<!--'
 678         //        ( (Char - '-') | ('-' (Char - '-'))*
 679         //        '-->'
 680         if (!in.peek(skipStart ? "!--" : "<!--", null))
 681             return false;
 682 
 683         boolean savedLexicalPE = doLexicalPE;
 684         boolean saveCommentText;
 685 
 686         doLexicalPE = false;
 687         saveCommentText = false;
 688         if (saveCommentText)
 689             strTmp = new StringBuffer();
 690 
 691         oneComment:
 692         for (; ;) {
 693             try {
 694                 // bypass PE expansion, but permit PEs
 695                 // to complete ... valid docs won't care.
 696                 for (; ;) {
 697                     int c = getc();
 698                     if (c == '-') {
 699                         c = getc();
 700                         if (c != '-') {
 701                             if (saveCommentText)
 702                                 strTmp.append('-');
 703                             ungetc();
 704                             continue;
 705                         }
 706                         nextChar('>', "F-022", null);
 707                         break oneComment;
 708                     }
 709                     if (saveCommentText)
 710                         strTmp.append((char) c);
 711                 }
 712             } catch (EndOfInputException e) {
 713                 //
 714                 // This is fatal EXCEPT when we're processing a PE...
 715                 // in which case a validating processor reports an error.
 716                 // External PEs are easy to detect; internal ones we
 717                 // infer by being an internal entity outside an element.
 718                 //
 719                 if (in.isInternal()) {
 720                     error("V-021", null);
 721                 }
 722                 fatal("P-017");
 723             }
 724         }
 725         doLexicalPE = savedLexicalPE;
 726         if (saveCommentText)
 727             dtdHandler.comment(strTmp.toString());
 728         return true;
 729     }
 730 
 731     private boolean maybePI(boolean skipStart)
 732             throws IOException, SAXException {
 733 
 734         // [16] PI ::= '<?' PITarget
 735         //        (S (Char* - (Char* '?>' Char*)))?
 736         //        '?>'
 737         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
 738         boolean savedLexicalPE = doLexicalPE;
 739 
 740         if (!in.peek(skipStart ? "?" : "<?", null))
 741             return false;
 742         doLexicalPE = false;
 743 
 744         String target = maybeGetName();
 745 
 746         if (target == null) {
 747             fatal("P-018");
 748         }
 749         if ("xml".equals(target)) {
 750             fatal("P-019");
 751         }
 752         if ("xml".equalsIgnoreCase(target)) {
 753             fatal("P-020", new Object[]{target});
 754         }
 755 
 756         if (maybeWhitespace()) {
 757             strTmp = new StringBuffer();
 758             try {
 759                 for (; ;) {
 760                     // use in.getc to bypass PE processing
 761                     char c = in.getc();
 762                     //Reached the end of PI.
 763                     if (c == '?' && in.peekc('>'))
 764                         break;
 765                     strTmp.append(c);
 766                 }
 767             } catch (EndOfInputException e) {
 768                 fatal("P-021");
 769             }
 770             dtdHandler.processingInstruction(target, strTmp.toString());
 771         } else {
 772             if (!in.peek("?>", null)) {
 773                 fatal("P-022");
 774             }
 775             dtdHandler.processingInstruction(target, "");
 776         }
 777 
 778         doLexicalPE = savedLexicalPE;
 779         return true;
 780     }
 781 
 782     // [18] CDSect ::= CDStart CData CDEnd
 783     // [19] CDStart ::= '<![CDATA['
 784     // [20] CData ::= (Char* - (Char* ']]>' Char*))
 785     // [21] CDEnd ::= ']]>'
 786     //
 787     //    ... handled by InputEntity.unparsedContent()
 788 
 789     // collapsing several rules together ...
 790     // simpler than attribute literals -- no reference parsing!
 791     private String maybeReadAttribute(String name, boolean must)
 792             throws IOException, SAXException {
 793 
 794         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
 795         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
 796         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
 797         if (!maybeWhitespace()) {
 798             if (!must) {
 799                 return null;
 800             }
 801             fatal("P-024", new Object[]{name});
 802             // NOTREACHED
 803         }
 804 
 805         if (!peek(name)) {
 806             if (must) {
 807                 fatal("P-024", new Object[]{name});
 808             } else {
 809                 // To ensure that the whitespace is there so that when we
 810                 // check for the next attribute we assure that the
 811                 // whitespace still exists.
 812                 ungetc();
 813                 return null;
 814             }
 815         }
 816 
 817         // [25] Eq ::= S? '=' S?
 818         maybeWhitespace();
 819         nextChar('=', "F-023", null);
 820         maybeWhitespace();
 821 
 822         return getQuotedString("F-035", name);
 823     }
 824 
 825     private void readVersion(boolean must, String versionNum)
 826             throws IOException, SAXException {
 827 
 828         String value = maybeReadAttribute("version", must);
 829 
 830         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
 831 
 832         if (must && value == null)
 833             fatal("P-025", new Object[]{versionNum});
 834         if (value != null) {
 835             int length = value.length();
 836             for (int i = 0; i < length; i++) {
 837                 char c = value.charAt(i);
 838                 if (!((c >= '0' && c <= '9')
 839                         || c == '_' || c == '.'
 840                         || (c >= 'a' && c <= 'z')
 841                         || (c >= 'A' && c <= 'Z')
 842                         || c == ':' || c == '-')
 843                 )
 844                     fatal("P-026", new Object[]{value});
 845             }
 846         }
 847         if (value != null && !value.equals(versionNum))
 848             error("P-027", new Object[]{versionNum, value});
 849     }
 850 
 851     // common code used by most markup declarations
 852     // ... S (Q)Name ...
 853     private String getMarkupDeclname(String roleId, boolean qname)
 854             throws IOException, SAXException {
 855 
 856         String name;
 857 
 858         whitespace(roleId);
 859         name = maybeGetName();
 860         if (name == null)
 861             fatal("P-005", new Object[]
 862             {messages.getMessage(locale, roleId)});
 863         return name;
 864     }
 865 
 866     private boolean maybeMarkupDecl()
 867             throws IOException, SAXException {
 868 
 869         // [29] markupdecl ::= elementdecl | Attlistdecl
 870         //           | EntityDecl | NotationDecl | PI | Comment
 871         return maybeElementDecl()
 872                 || maybeAttlistDecl()
 873                 || maybeEntityDecl()
 874                 || maybeNotationDecl()
 875                 || maybePI(false)
 876                 || maybeComment(false);
 877     }
 878 
 879     private static final String XmlLang = "xml:lang";
 880 
 881     private boolean isXmlLang(String value) {
 882 
 883         // [33] LanguageId ::= Langcode ('-' Subcode)*
 884         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
 885         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
 886         // [36] IanaCode ::= [iI] '-' SubCode
 887         // [37] UserCode ::= [xX] '-' SubCode
 888         // [38] SubCode ::= [a-zA-Z]+
 889 
 890         // the ISO and IANA codes (and subcodes) are registered,
 891         // but that's neither a WF nor a validity constraint.
 892 
 893         int nextSuffix;
 894         char c;
 895 
 896         if (value.length() < 2)
 897             return false;
 898         c = value.charAt(1);
 899         if (c == '-') {        // IANA, or user, code
 900             c = value.charAt(0);
 901             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
 902                 return false;
 903             nextSuffix = 1;
 904         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 905             // 2 letter ISO code, or error
 906             c = value.charAt(0);
 907             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
 908                 return false;
 909             nextSuffix = 2;
 910         } else
 911             return false;
 912 
 913         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
 914         while (nextSuffix < value.length()) {
 915             c = value.charAt(nextSuffix);
 916             if (c != '-')
 917                 break;
 918             while (++nextSuffix < value.length()) {
 919                 c = value.charAt(nextSuffix);
 920                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
 921                     break;
 922             }
 923         }
 924         return value.length() == nextSuffix && c != '-';
 925     }
 926 
 927 
 928     //
 929     // CHAPTER 3:  Logical Structures
 930     //
 931 
 932     /**
 933      * To validate, subclassers should at this time make sure that
 934      * values are of the declared types:<UL>
 935      * <LI> ID and IDREF(S) values are Names
 936      * <LI> NMTOKEN(S) are Nmtokens
 937      * <LI> ENUMERATION values match one of the tokens
 938      * <LI> NOTATION values match a notation name
 939      * <LI> ENTITIY(IES) values match an unparsed external entity
 940      * </UL>
 941      * <p/>
 942      * <P> Separately, make sure IDREF values match some ID
 943      * provided in the document (in the afterRoot method).
 944      */
 945 /*    void validateAttributeSyntax (Attribute attr, String value)
 946          throws DTDParseException {
 947         // ID, IDREF(S) ... values are Names
 948         if (Attribute.ID == attr.type()) {
 949             if (!XmlNames.isName (value))
 950                 error ("V-025", new Object [] { value });
 951 
 952             Boolean             b = (Boolean) ids.getNonInterned (value);
 953             if (b == null || b.equals (Boolean.FALSE))
 954                 ids.put (value.intern (), Boolean.TRUE);
 955             else
 956                 error ("V-026", new Object [] { value });
 957 
 958         } else if (Attribute.IDREF == attr.type()) {
 959             if (!XmlNames.isName (value))
 960                 error ("V-027", new Object [] { value });
 961 
 962             Boolean             b = (Boolean) ids.getNonInterned (value);
 963             if (b == null)
 964                 ids.put (value.intern (), Boolean.FALSE);
 965 
 966         } else if (Attribute.IDREFS == attr.type()) {
 967             StringTokenizer     tokenizer = new StringTokenizer (value);
 968             Boolean             b;
 969             boolean             sawValue = false;
 970 
 971             while (tokenizer.hasMoreTokens ()) {
 972                 value = tokenizer.nextToken ();
 973                 if (!XmlNames.isName (value))
 974                     error ("V-027", new Object [] { value });
 975                 b = (Boolean) ids.getNonInterned (value);
 976                 if (b == null)
 977                     ids.put (value.intern (), Boolean.FALSE);
 978                 sawValue = true;
 979             }
 980             if (!sawValue)
 981                 error ("V-039", null);
 982 
 983 
 984         // NMTOKEN(S) ... values are Nmtoken(s)
 985         } else if (Attribute.NMTOKEN == attr.type()) {
 986             if (!XmlNames.isNmtoken (value))
 987                 error ("V-028", new Object [] { value });
 988 
 989         } else if (Attribute.NMTOKENS == attr.type()) {
 990             StringTokenizer     tokenizer = new StringTokenizer (value);
 991             boolean             sawValue = false;
 992 
 993             while (tokenizer.hasMoreTokens ()) {
 994                 value = tokenizer.nextToken ();
 995                 if (!XmlNames.isNmtoken (value))
 996                     error ("V-028", new Object [] { value });
 997                 sawValue = true;
 998             }
 999             if (!sawValue)
1000                 error ("V-032", null);
1001 
1002         // ENUMERATION ... values match one of the tokens
1003         } else if (Attribute.ENUMERATION == attr.type()) {
1004             for (int i = 0; i < attr.values().length; i++)
1005                 if (value.equals (attr.values()[i]))
1006                     return;
1007             error ("V-029", new Object [] { value });
1008 
1009         // NOTATION values match a notation name
1010         } else if (Attribute.NOTATION == attr.type()) {
1011             //
1012             // XXX XML 1.0 spec should probably list references to
1013             // externally defined notations in standalone docs as
1014             // validity errors.  Ditto externally defined unparsed
1015             // entities; neither should show up in attributes, else
1016             // one needs to read the external declarations in order
1017             // to make sense of the document (exactly what tagging
1018             // a doc as "standalone" intends you won't need to do).
1019             //
1020             for (int i = 0; i < attr.values().length; i++)
1021                 if (value.equals (attr.values()[i]))
1022                     return;
1023             error ("V-030", new Object [] { value });
1024 
1025         // ENTITY(IES) values match an unparsed entity(ies)
1026         } else if (Attribute.ENTITY == attr.type()) {
1027             // see note above re standalone
1028             if (!isUnparsedEntity (value))
1029                 error ("V-031", new Object [] { value });
1030 
1031         } else if (Attribute.ENTITIES == attr.type()) {
1032             StringTokenizer     tokenizer = new StringTokenizer (value);
1033             boolean             sawValue = false;
1034 
1035             while (tokenizer.hasMoreTokens ()) {
1036                 value = tokenizer.nextToken ();
1037                 // see note above re standalone
1038                 if (!isUnparsedEntity (value))
1039                     error ("V-031", new Object [] { value });
1040                 sawValue = true;
1041             }
1042             if (!sawValue)
1043                 error ("V-040", null);
1044 
1045         } else if (Attribute.CDATA != attr.type())
1046             throw new InternalError (attr.type());
1047     }
1048 */
1049 /*
1050     private boolean isUnparsedEntity (String name)
1051     {
1052         Object e = entities.getNonInterned (name);
1053         if (e == null || !(e instanceof ExternalEntity))
1054             return false;
1055         return ((ExternalEntity)e).notation != null;
1056     }
1057 */
1058     private boolean maybeElementDecl()
1059             throws IOException, SAXException {
1060 
1061         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1062         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1063         InputEntity start = peekDeclaration("!ELEMENT");
1064 
1065         if (start == null)
1066             return false;
1067 
1068         // n.b. for content models where inter-element whitespace is
1069         // ignorable, we mark that fact here.
1070         String name = getMarkupDeclname("F-015", true);
1071 //    Element        element = (Element) elements.get (name);
1072 //    boolean        declEffective = false;
1073 
1074 /*
1075     if (element != null) {
1076         if (element.contentModel() != null) {
1077             error ("V-012", new Object [] { name });
1078         } // else <!ATTLIST name ...> came first
1079     } else {
1080         element = new Element(name);
1081         elements.put (element.name(), element);
1082         declEffective = true;
1083     }
1084 */
1085         if (declaredElements.contains(name))
1086             error("V-012", new Object[]{name});
1087         else {
1088             declaredElements.add(name);
1089 //        declEffective = true;
1090         }
1091 
1092         short modelType;
1093         whitespace("F-000");
1094         if (peek(strEMPTY)) {
1095 ///        // leave element.contentModel as null for this case.
1096             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1097         } else if (peek(strANY)) {
1098 ///        element.setContentModel(new StringModel(StringModelType.ANY));
1099             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1100         } else {
1101             modelType = getMixedOrChildren(name);
1102         }
1103 
1104         dtdHandler.endContentModel(name, modelType);
1105 
1106         maybeWhitespace();
1107         char c = getc();
1108         if (c != '>')
1109             fatal("P-036", new Object[]{name, new Character(c)});
1110         if (start != in)
1111             error("V-013", null);
1112 
1113 ///        dtdHandler.elementDecl(element);
1114 
1115         return true;
1116     }
1117 
1118     // We're leaving the content model as a regular expression;
1119     // it's an efficient natural way to express such things, and
1120     // libraries often interpret them.  No whitespace in the
1121     // model we store, though!
1122 
1123     /**
1124      * returns content model type.
1125      */
1126     private short getMixedOrChildren(String elementName/*Element element*/)
1127             throws IOException, SAXException {
1128 
1129         InputEntity start;
1130 
1131         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1132         strTmp = new StringBuffer();
1133 
1134         nextChar('(', "F-028", elementName);
1135         start = in;
1136         maybeWhitespace();
1137         strTmp.append('(');
1138 
1139         short modelType;
1140         if (peek("#PCDATA")) {
1141             strTmp.append("#PCDATA");
1142             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1143             getMixed(elementName, start);
1144         } else {
1145             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1146             getcps(elementName, start);
1147         }
1148 
1149         return modelType;
1150     }
1151 
1152     // '(' S? already consumed
1153     // matching ')' must be in "start" entity if validating
1154     private void getcps(/*Element element,*/String elementName, InputEntity start)
1155             throws IOException, SAXException {
1156 
1157         // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1158         // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1159         // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
1160         boolean decided = false;
1161         char type = 0;
1162 //        ContentModel       retval, temp, current;
1163 
1164 //        retval = temp = current = null;
1165 
1166         dtdHandler.startModelGroup();
1167 
1168         do {
1169             String tag;
1170 
1171             tag = maybeGetName();
1172             if (tag != null) {
1173                 strTmp.append(tag);
1174 //                temp = new ElementModel(tag);
1175 //                getFrequency((RepeatableContent)temp);
1176 ///->
1177                 dtdHandler.childElement(tag, getFrequency());
1178 ///<-
1179             } else if (peek("(")) {
1180                 InputEntity next = in;
1181                 strTmp.append('(');
1182                 maybeWhitespace();
1183 //                temp = getcps(element, next);
1184 //                getFrequency(temp);
1185 ///->
1186                 getcps(elementName, next);
1187 ///                getFrequency();        <- this looks like a bug
1188 ///<-
1189             } else
1190                 fatal((type == 0) ? "P-039" :
1191                         ((type == ',') ? "P-037" : "P-038"),
1192                         new Object[]{new Character(getc())});
1193 
1194             maybeWhitespace();
1195             if (decided) {
1196                 char c = getc();
1197 
1198 //                if (current != null) {
1199 //                    current.addChild(temp);
1200 //                }
1201                 if (c == type) {
1202                     strTmp.append(type);
1203                     maybeWhitespace();
1204                     reportConnector(type);
1205                     continue;
1206                 } else if (c == '\u0029') {    // rparen
1207                     ungetc();
1208                     continue;
1209                 } else {
1210                     fatal((type == 0) ? "P-041" : "P-040",
1211                             new Object[]{
1212                                 new Character(c),
1213                                 new Character(type)
1214                             });
1215                 }
1216             } else {
1217                 type = getc();
1218                 switch (type) {
1219                 case '|':
1220                 case ',':
1221                     reportConnector(type);
1222                     break;
1223                 default:
1224 //                        retval = temp;
1225                     ungetc();
1226                     continue;
1227                 }
1228 //                retval = (ContentModel)current;
1229                 decided = true;
1230 //                current.addChild(temp);
1231                 strTmp.append(type);
1232             }
1233             maybeWhitespace();
1234         } while (!peek(")"));
1235 
1236         if (in != start)
1237             error("V-014", new Object[]{elementName});
1238         strTmp.append(')');
1239 
1240         dtdHandler.endModelGroup(getFrequency());
1241 //        return retval;
1242     }
1243 
1244     private void reportConnector(char type) throws SAXException {
1245         switch (type) {
1246         case '|':
1247             dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
1248             return;
1249         case ',':
1250             dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1251             return;
1252         default:
1253             throw new Error();    //assertion failed.
1254         }
1255     }
1256 
1257     private short getFrequency()
1258             throws IOException, SAXException {
1259 
1260         final char c = getc();
1261 
1262         if (c == '?') {
1263             strTmp.append(c);
1264             return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1265             //        original.setRepeat(Repeat.ZERO_OR_ONE);
1266         } else if (c == '+') {
1267             strTmp.append(c);
1268             return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1269             //        original.setRepeat(Repeat.ONE_OR_MORE);
1270         } else if (c == '*') {
1271             strTmp.append(c);
1272             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1273             //        original.setRepeat(Repeat.ZERO_OR_MORE);
1274         } else {
1275             ungetc();
1276             return DTDEventListener.OCCURENCE_ONCE;
1277         }
1278     }
1279 
1280     // '(' S? '#PCDATA' already consumed
1281     // matching ')' must be in "start" entity if validating
1282     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1283             throws IOException, SAXException {
1284 
1285         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1286         //        | '(' S? '#PCDATA'                   S? ')'
1287         maybeWhitespace();
1288         if (peek("\u0029*") || peek("\u0029")) {
1289             if (in != start)
1290                 error("V-014", new Object[]{elementName});
1291             strTmp.append(')');
1292 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1293             return;
1294         }
1295 
1296         ArrayList l = new ArrayList();
1297 //    l.add(new StringModel(StringModelType.PCDATA));
1298 
1299 
1300         while (peek("|")) {
1301             String name;
1302 
1303             strTmp.append('|');
1304             maybeWhitespace();
1305 
1306             doLexicalPE = true;
1307             name = maybeGetName();
1308             if (name == null)
1309                 fatal("P-042", new Object[]
1310                 {elementName, Integer.toHexString(getc())});
1311             if (l.contains(name)) {
1312                 error("V-015", new Object[]{name});
1313             } else {
1314                 l.add(name);
1315                 dtdHandler.mixedElement(name);
1316             }
1317             strTmp.append(name);
1318             maybeWhitespace();
1319         }
1320 
1321         if (!peek("\u0029*"))    // right paren
1322             fatal("P-043", new Object[]
1323             {elementName, new Character(getc())});
1324         if (in != start)
1325             error("V-014", new Object[]{elementName});
1326         strTmp.append(')');
1327 //        ChoiceModel cm = new ChoiceModel((Collection)l);
1328 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1329 //       element.setContentModel(cm);
1330     }
1331 
1332     private boolean maybeAttlistDecl()
1333             throws IOException, SAXException {
1334 
1335         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1336         InputEntity start = peekDeclaration("!ATTLIST");
1337 
1338         if (start == null)
1339             return false;
1340 
1341         String elementName = getMarkupDeclname("F-016", true);
1342 //    Element    element = (Element) elements.get (name);
1343 
1344 //    if (element == null) {
1345 //        // not yet declared -- no problem.
1346 //        element = new Element(name);
1347 //        elements.put(name, element);
1348 //    }
1349 
1350         while (!peek(">")) {
1351 
1352             // [53] AttDef ::= S Name S AttType S DefaultDecl
1353             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1354 
1355             // look for global attribute definitions, don't expand for now...
1356             maybeWhitespace();
1357             char c = getc();
1358             if (c == '%') {
1359                 String entityName = maybeGetName();
1360                 if (entityName != null) {
1361                     nextChar(';', "F-021", entityName);
1362                     whitespace("F-021");
1363                     continue;
1364                 } else
1365                     fatal("P-011");
1366             }
1367 
1368             ungetc();
1369             // look for attribute name otherwise
1370             String attName = maybeGetName();
1371             if (attName == null) {
1372                 fatal("P-044", new Object[]{new Character(getc())});
1373             }
1374             whitespace("F-001");
1375 
1376 ///        Attribute    a = new Attribute (name);
1377 
1378             String typeName;
1379             Vector values = null;    // notation/enumeration values
1380 
1381             // Note:  use the type constants from Attribute
1382             // so that "==" may be used (faster)
1383 
1384             // [55] StringType ::= 'CDATA'
1385             if (peek(TYPE_CDATA))
1386 ///            a.setType(Attribute.CDATA);
1387                 typeName = TYPE_CDATA;
1388 
1389             // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1390             //        | 'ENTITY' | 'ENTITIES'
1391             //        | 'NMTOKEN' | 'NMTOKENS'
1392             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1393             // match peekahead ... so this order matters!
1394             else if (peek(TYPE_IDREFS))
1395                 typeName = TYPE_IDREFS;
1396             else if (peek(TYPE_IDREF))
1397                 typeName = TYPE_IDREF;
1398             else if (peek(TYPE_ID)) {
1399                 typeName = TYPE_ID;
1400 // TODO: should implement this error check?
1401 ///        if (element.id() != null) {
1402 ///                    error ("V-016", new Object [] { element.id() });
1403 ///        } else
1404 ///            element.setId(name);
1405             } else if (peek(TYPE_ENTITY))
1406                 typeName = TYPE_ENTITY;
1407             else if (peek(TYPE_ENTITIES))
1408                 typeName = TYPE_ENTITIES;
1409             else if (peek(TYPE_NMTOKENS))
1410                 typeName = TYPE_NMTOKENS;
1411             else if (peek(TYPE_NMTOKEN))
1412                 typeName = TYPE_NMTOKEN;
1413 
1414             // [57] EnumeratedType ::= NotationType | Enumeration
1415             // [58] NotationType ::= 'NOTATION' S '(' S? Name
1416             //        (S? '|' S? Name)* S? ')'
1417             else if (peek(TYPE_NOTATION)) {
1418                 typeName = TYPE_NOTATION;
1419                 whitespace("F-002");
1420                 nextChar('(', "F-029", null);
1421                 maybeWhitespace();
1422 
1423                 values = new Vector();
1424                 do {
1425                     String name;
1426                     if ((name = maybeGetName()) == null)
1427                         fatal("P-068");
1428                     // permit deferred declarations
1429                     if (notations.get(name) == null)
1430                         notations.put(name, name);
1431                     values.addElement(name);
1432                     maybeWhitespace();
1433                     if (peek("|"))
1434                         maybeWhitespace();
1435                 } while (!peek(")"));
1436 ///            a.setValues(new String [v.size ()]);
1437 ///            for (int i = 0; i < v.size (); i++)
1438 ///                a.setValue(i, (String)v.elementAt(i));
1439 
1440                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1441             } else if (peek("(")) {
1442 ///            a.setType(Attribute.ENUMERATION);
1443                 typeName = TYPE_ENUMERATION;
1444 
1445                 maybeWhitespace();
1446 
1447 ///            Vector v = new Vector ();
1448                 values = new Vector();
1449                 do {
1450                     String name = getNmtoken();
1451 ///                v.addElement (name);
1452                     values.addElement(name);
1453                     maybeWhitespace();
1454                     if (peek("|"))
1455                         maybeWhitespace();
1456                 } while (!peek(")"));
1457 ///            a.setValues(new String [v.size ()]);
1458 ///            for (int i = 0; i < v.size (); i++)
1459 ///                a.setValue(i, (String)v.elementAt(i));
1460             } else {
1461                 fatal("P-045",
1462                         new Object[]{attName, new Character(getc())});
1463                 typeName = null;
1464             }
1465 
1466             short attributeUse;
1467             String defaultValue = null;
1468 
1469             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1470             //        | (('#FIXED' S)? AttValue)
1471             whitespace("F-003");
1472             if (peek("#REQUIRED"))
1473                 attributeUse = DTDEventListener.USE_REQUIRED;
1474 ///            a.setIsRequired(true);
1475             else if (peek("#FIXED")) {
1476 ///            if (a.type() == Attribute.ID)
1477                 if (typeName == TYPE_ID)
1478                     error("V-017", new Object[]{attName});
1479 ///            a.setIsFixed(true);
1480                 attributeUse = DTDEventListener.USE_FIXED;
1481                 whitespace("F-004");
1482                 parseLiteral(false);
1483 ///            if (a.type() != Attribute.CDATA)
1484 ///                a.setDefaultValue(normalize(false));
1485 ///            else
1486 ///                a.setDefaultValue(strTmp.toString());
1487 
1488                 if (typeName == TYPE_CDATA)
1489                     defaultValue = normalize(false);
1490                 else
1491                     defaultValue = strTmp.toString();
1492 
1493 // TODO: implement this check
1494 ///            if (a.type() != Attribute.CDATA)
1495 ///                validateAttributeSyntax (a, a.defaultValue());
1496             } else if (!peek("#IMPLIED")) {
1497                 attributeUse = DTDEventListener.USE_IMPLIED;
1498 
1499 ///            if (a.type() == Attribute.ID)
1500                 if (typeName == TYPE_ID)
1501                     error("V-018", new Object[]{attName});
1502                 parseLiteral(false);
1503 ///            if (a.type() != Attribute.CDATA)
1504 ///                a.setDefaultValue(normalize(false));
1505 ///            else
1506 ///                a.setDefaultValue(strTmp.toString());
1507                 if (typeName == TYPE_CDATA)
1508                     defaultValue = normalize(false);
1509                 else
1510                     defaultValue = strTmp.toString();
1511 
1512 // TODO: implement this check
1513 ///            if (a.type() != Attribute.CDATA)
1514 ///                validateAttributeSyntax (a, a.defaultValue());
1515             } else {
1516                 // TODO: this looks like an fatal error.
1517                 attributeUse = DTDEventListener.USE_NORMAL;
1518             }
1519 
1520             if (XmlLang.equals(attName)
1521                     && defaultValue/* a.defaultValue()*/ != null
1522                     && !isXmlLang(defaultValue/*a.defaultValue()*/))
1523                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
1524 
1525 // TODO: isn't it an error to specify the same attribute twice?
1526 ///        if (!element.attributes().contains(a)) {
1527 ///            element.addAttribute(a);
1528 ///            dtdHandler.attributeDecl(a);
1529 ///        }
1530 
1531             String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
1532             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1533             maybeWhitespace();
1534         }
1535         if (start != in)
1536             error("V-013", null);
1537         return true;
1538     }
1539 
1540     // used when parsing literal attribute values,
1541     // or public identifiers.
1542     //
1543     // input in strTmp
1544     private String normalize(boolean invalidIfNeeded) {
1545 
1546         // this can allocate an extra string...
1547 
1548         String s = strTmp.toString();
1549         String s2 = s.trim();
1550         boolean didStrip = false;
1551 
1552         if (s != s2) {
1553             s = s2;
1554             s2 = null;
1555             didStrip = true;
1556         }
1557         strTmp = new StringBuffer();
1558         for (int i = 0; i < s.length(); i++) {
1559             char c = s.charAt(i);
1560             if (!XmlChars.isSpace(c)) {
1561                 strTmp.append(c);
1562                 continue;
1563             }
1564             strTmp.append(' ');
1565             while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1566                 didStrip = true;
1567             i--;
1568         }
1569         if (didStrip)
1570             return strTmp.toString();
1571         else
1572             return s;
1573     }
1574 
1575     private boolean maybeConditionalSect()
1576             throws IOException, SAXException {
1577 
1578         // [61] conditionalSect ::= includeSect | ignoreSect
1579 
1580         if (!peek("<!["))
1581             return false;
1582 
1583         String keyword;
1584         InputEntity start = in;
1585 
1586         maybeWhitespace();
1587 
1588         if ((keyword = maybeGetName()) == null)
1589             fatal("P-046");
1590         maybeWhitespace();
1591         nextChar('[', "F-030", null);
1592 
1593         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1594         //                extSubsetDecl ']]>'
1595         if ("INCLUDE".equals(keyword)) {
1596             for (; ;) {
1597                 while (in.isEOF() && in != start)
1598                     in = in.pop();
1599                 if (in.isEOF()) {
1600                     error("V-020", null);
1601                 }
1602                 if (peek("]]>"))
1603                     break;
1604 
1605                 doLexicalPE = false;
1606                 if (maybeWhitespace())
1607                     continue;
1608                 if (maybePEReference())
1609                     continue;
1610                 doLexicalPE = true;
1611                 if (maybeMarkupDecl() || maybeConditionalSect())
1612                     continue;
1613 
1614                 fatal("P-047");
1615             }
1616 
1617             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1618             //            ignoreSectcontents ']]>'
1619             // [64] ignoreSectcontents ::= Ignore ('<!['
1620             //            ignoreSectcontents ']]>' Ignore)*
1621             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1622         } else if ("IGNORE".equals(keyword)) {
1623             int nestlevel = 1;
1624             // ignoreSectcontents
1625             doLexicalPE = false;
1626             while (nestlevel > 0) {
1627                 char c = getc();    // will pop input entities
1628                 if (c == '<') {
1629                     if (peek("!["))
1630                         nestlevel++;
1631                 } else if (c == ']') {
1632                     if (peek("]>"))
1633                         nestlevel--;
1634                 } else
1635                     continue;
1636             }
1637         } else
1638             fatal("P-048", new Object[]{keyword});
1639         return true;
1640     }
1641 
1642 
1643     //
1644     // CHAPTER 4:  Physical Structures
1645     //
1646 
1647     // parse decimal or hex numeric character reference
1648     private int parseCharNumber()
1649             throws IOException, SAXException {
1650 
1651         char c;
1652         int retval = 0;
1653 
1654         // n.b. we ignore overflow ...
1655         if (getc() != 'x') {
1656             ungetc();
1657             for (; ;) {
1658                 c = getc();
1659                 if (c >= '0' && c <= '9') {
1660                     retval *= 10;
1661                     retval += (c - '0');
1662                     continue;
1663                 }
1664                 if (c == ';')
1665                     return retval;
1666                 fatal("P-049");
1667             }
1668         } else
1669             for (; ;) {
1670                 c = getc();
1671                 if (c >= '0' && c <= '9') {
1672                     retval <<= 4;
1673                     retval += (c - '0');
1674                     continue;
1675                 }
1676                 if (c >= 'a' && c <= 'f') {
1677                     retval <<= 4;
1678                     retval += 10 + (c - 'a');
1679                     continue;
1680                 }
1681                 if (c >= 'A' && c <= 'F') {
1682                     retval <<= 4;
1683                     retval += 10 + (c - 'A');
1684                     continue;
1685                 }
1686                 if (c == ';')
1687                     return retval;
1688                 fatal("P-050");
1689             }
1690     }
1691 
1692     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1693     // though still subject to the 'Char' construct in XML
1694     private int surrogatesToCharTmp(int ucs4)
1695             throws SAXException {
1696 
1697         if (ucs4 <= 0xffff) {
1698             if (XmlChars.isChar(ucs4)) {
1699                 charTmp[0] = (char) ucs4;
1700                 return 1;
1701             }
1702         } else if (ucs4 <= 0x0010ffff) {
1703             // we represent these as UNICODE surrogate pairs
1704             ucs4 -= 0x10000;
1705             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1706             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1707             return 2;
1708         }
1709         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1710         // NOTREACHED
1711         return -1;
1712     }
1713 
1714     private boolean maybePEReference()
1715             throws IOException, SAXException {
1716 
1717         // This is the SYNTACTIC version of this construct.
1718         // When processing external entities, there is also
1719         // a LEXICAL version; see getc() and doLexicalPE.
1720 
1721         // [69] PEReference ::= '%' Name ';'
1722         if (!in.peekc('%'))
1723             return false;
1724 
1725         String name = maybeGetName();
1726         Object entity;
1727 
1728         if (name == null)
1729             fatal("P-011");
1730         nextChar(';', "F-021", name);
1731         entity = params.get(name);
1732 
1733         if (entity instanceof InternalEntity) {
1734             InternalEntity value = (InternalEntity) entity;
1735             pushReader(value.buf, name, false);
1736 
1737         } else if (entity instanceof ExternalEntity) {
1738             pushReader((ExternalEntity) entity);
1739             externalParameterEntity((ExternalEntity) entity);
1740 
1741         } else if (entity == null) {
1742             error("V-022", new Object[]{name});
1743         }
1744         return true;
1745     }
1746 
1747     private boolean maybeEntityDecl()
1748             throws IOException, SAXException {
1749 
1750         // [70] EntityDecl ::= GEDecl | PEDecl
1751         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1752         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1753         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1754         // [74] PEDef     ::= EntityValue |  ExternalID
1755         //
1756         InputEntity start = peekDeclaration("!ENTITY");
1757 
1758         if (start == null)
1759             return false;
1760 
1761         String entityName;
1762         SimpleHashtable defns;
1763         ExternalEntity externalId;
1764         boolean doStore;
1765 
1766         // PE expansion gets selectively turned off several places:
1767         // in ENTITY declarations (here), in comments, in PIs.
1768 
1769         // Here, we allow PE entities to be declared, and allows
1770         // literals to include PE refs without the added spaces
1771         // required with their expansion in markup decls.
1772 
1773         doLexicalPE = false;
1774         whitespace("F-005");
1775         if (in.peekc('%')) {
1776             whitespace("F-006");
1777             defns = params;
1778         } else
1779             defns = entities;
1780 
1781         ungetc();    // leave some whitespace
1782         doLexicalPE = true;
1783         entityName = getMarkupDeclname("F-017", false);
1784         whitespace("F-007");
1785         externalId = maybeExternalID();
1786 
1787         //
1788         // first definition sticks ... e.g. internal subset PEs are used
1789         // to override DTD defaults.  It's also an "error" to incorrectly
1790         // redefine builtin internal entities, but since reporting such
1791         // errors is optional we only give warnings ("just in case") for
1792         // non-parameter entities.
1793         //
1794         doStore = (defns.get(entityName) == null);
1795         if (!doStore && defns == entities)
1796             warning("P-054", new Object[]{entityName});
1797 
1798         // internal entities
1799         if (externalId == null) {
1800             char value [];
1801             InternalEntity entity;
1802 
1803             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
1804             parseLiteral(true);
1805             doLexicalPE = true;
1806             if (doStore) {
1807                 value = new char[strTmp.length()];
1808                 if (value.length != 0)
1809                     strTmp.getChars(0, value.length, value, 0);
1810                 entity = new InternalEntity(entityName, value);
1811                 entity.isPE = (defns == params);
1812                 entity.isFromInternalSubset = false;
1813                 defns.put(entityName, entity);
1814                 if (defns == entities)
1815                     dtdHandler.internalGeneralEntityDecl(entityName,
1816                             new String(value));
1817             }
1818 
1819             // external entities (including unparsed)
1820         } else {
1821             // [76] NDataDecl ::= S 'NDATA' S Name
1822             if (defns == entities && maybeWhitespace()
1823                     && peek("NDATA")) {
1824                 externalId.notation = getMarkupDeclname("F-018", false);
1825 
1826                 // flag undeclared notation for checking after
1827                 // the DTD is fully processed
1828                 if (notations.get(externalId.notation) == null)
1829                     notations.put(externalId.notation, Boolean.TRUE);
1830             }
1831             externalId.name = entityName;
1832             externalId.isPE = (defns == params);
1833             externalId.isFromInternalSubset = false;
1834             if (doStore) {
1835                 defns.put(entityName, externalId);
1836                 if (externalId.notation != null)
1837                     dtdHandler.unparsedEntityDecl(entityName,
1838                             externalId.publicId, externalId.systemId,
1839                             externalId.notation);
1840                 else if (defns == entities)
1841                     dtdHandler.externalGeneralEntityDecl(entityName,
1842                             externalId.publicId, externalId.systemId);
1843             }
1844         }
1845         maybeWhitespace();
1846         nextChar('>', "F-031", entityName);
1847         if (start != in)
1848             error("V-013", null);
1849         return true;
1850     }
1851 
1852     private ExternalEntity maybeExternalID()
1853             throws IOException, SAXException {
1854 
1855         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1856         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1857         String temp = null;
1858         ExternalEntity retval;
1859 
1860         if (peek("PUBLIC")) {
1861             whitespace("F-009");
1862             temp = parsePublicId();
1863         } else if (!peek("SYSTEM"))
1864             return null;
1865 
1866         retval = new ExternalEntity(in);
1867         retval.publicId = temp;
1868         whitespace("F-008");
1869         retval.systemId = parseSystemId();
1870         return retval;
1871     }
1872 
1873     private String parseSystemId()
1874             throws IOException, SAXException {
1875 
1876         String uri = getQuotedString("F-034", null);
1877         int temp = uri.indexOf(':');
1878 
1879         // resolve relative URIs ... must do it here since
1880         // it's relative to the source file holding the URI!
1881 
1882         // "new java.net.URL (URL, string)" conforms to RFC 1630,
1883         // but we can't use that except when the URI is a URL.
1884         // The entity resolver is allowed to handle URIs that are
1885         // not URLs, so we pass URIs through with scheme intact
1886         if (temp == -1 || uri.indexOf('/') < temp) {
1887             String baseURI;
1888 
1889             baseURI = in.getSystemId();
1890             if (baseURI == null)
1891                 fatal("P-055", new Object[]{uri});
1892             if (uri.length() == 0)
1893                 uri = ".";
1894             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1895             if (uri.charAt(0) != '/')
1896                 uri = baseURI + uri;
1897             else {
1898                 // XXX slashes at the beginning of a relative URI are
1899                 // a special case we don't handle.
1900                 throw new InternalError();
1901             }
1902 
1903             // letting other code map any "/xxx/../" or "/./" to "/",
1904             // since all URIs must handle it the same.
1905         }
1906         // check for fragment ID in URI
1907         if (uri.indexOf('#') != -1)
1908             error("P-056", new Object[]{uri});
1909         return uri;
1910     }
1911 
1912     private void maybeTextDecl()
1913             throws IOException, SAXException {
1914 
1915         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1916         if (peek("<?xml")) {
1917             readVersion(false, "1.0");
1918             readEncoding(true);
1919             maybeWhitespace();
1920             if (!peek("?>"))
1921                 fatal("P-057");
1922         }
1923     }
1924 
1925     private void externalParameterEntity(ExternalEntity next)
1926             throws IOException, SAXException {
1927 
1928         //
1929         // Reap the intended benefits of standalone declarations:
1930         // don't deal with external parameter entities, except to
1931         // validate the standalone declaration.
1932         //
1933 
1934         // n.b. "in external parameter entities" (and external
1935         // DTD subset, same grammar) parameter references can
1936         // occur "within" markup declarations ... expansions can
1937         // cross syntax rules.  Flagged here; affects getc().
1938 
1939         // [79] ExtPE ::= TextDecl? extSubsetDecl
1940         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1941         //        | PEReference | S )*
1942         InputEntity pe;
1943 
1944         // XXX if this returns false ...
1945 
1946         pe = in;
1947         maybeTextDecl();
1948         while (!pe.isEOF()) {
1949             // pop internal PEs (and whitespace before/after)
1950             if (in.isEOF()) {
1951                 in = in.pop();
1952                 continue;
1953             }
1954             doLexicalPE = false;
1955             if (maybeWhitespace())
1956                 continue;
1957             if (maybePEReference())
1958                 continue;
1959             doLexicalPE = true;
1960             if (maybeMarkupDecl() || maybeConditionalSect())
1961                 continue;
1962             break;
1963         }
1964         // if (in != pe) throw new InternalError("who popped my PE?");
1965         if (!pe.isEOF())
1966             fatal("P-059", new Object[]{in.getName()});
1967     }
1968 
1969     private void readEncoding(boolean must)
1970             throws IOException, SAXException {
1971 
1972         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1973         String name = maybeReadAttribute("encoding", must);
1974 
1975         if (name == null)
1976             return;
1977         for (int i = 0; i < name.length(); i++) {
1978             char c = name.charAt(i);
1979             if ((c >= 'A' && c <= 'Z')
1980                     || (c >= 'a' && c <= 'z'))
1981                 continue;
1982             if (i != 0
1983                     && ((c >= '0' && c <= '9')
1984                     || c == '-'
1985                     || c == '_'
1986                     || c == '.'
1987                     ))
1988                 continue;
1989             fatal("P-060", new Object[]{new Character(c)});
1990         }
1991 
1992         //
1993         // This should be the encoding in use, and it's even an error for
1994         // it to be anything else (in certain cases that are impractical to
1995         // to test, and may even be insufficient).  So, we do the best we
1996         // can, and warn if things look suspicious.  Note that Java doesn't
1997         // uniformly expose the encodings, and that the names it uses
1998         // internally are nonstandard.  Also, that the XML spec allows
1999         // such "errors" not to be reported at all.
2000         //
2001         String currentEncoding = in.getEncoding();
2002 
2003         if (currentEncoding != null
2004                 && !name.equalsIgnoreCase(currentEncoding))
2005             warning("P-061", new Object[]{name, currentEncoding});
2006     }
2007 
2008     private boolean maybeNotationDecl()
2009             throws IOException, SAXException {
2010 
2011         // [82] NotationDecl ::= '<!NOTATION' S Name S
2012         //        (ExternalID | PublicID) S? '>'
2013         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2014         InputEntity start = peekDeclaration("!NOTATION");
2015 
2016         if (start == null)
2017             return false;
2018 
2019         String name = getMarkupDeclname("F-019", false);
2020         ExternalEntity entity = new ExternalEntity(in);
2021 
2022         whitespace("F-011");
2023         if (peek("PUBLIC")) {
2024             whitespace("F-009");
2025             entity.publicId = parsePublicId();
2026             if (maybeWhitespace()) {
2027                 if (!peek(">"))
2028                     entity.systemId = parseSystemId();
2029                 else
2030                     ungetc();
2031             }
2032         } else if (peek("SYSTEM")) {
2033             whitespace("F-008");
2034             entity.systemId = parseSystemId();
2035         } else
2036             fatal("P-062");
2037         maybeWhitespace();
2038         nextChar('>', "F-032", name);
2039         if (start != in)
2040             error("V-013", null);
2041         if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
2042             error("P-056", new Object[]{entity.systemId});
2043 
2044         Object value = notations.get(name);
2045         if (value != null && value instanceof ExternalEntity)
2046             warning("P-063", new Object[]{name});
2047 
2048         else {
2049             notations.put(name, entity);
2050             dtdHandler.notationDecl(name, entity.publicId,
2051                     entity.systemId);
2052         }
2053         return true;
2054     }
2055 
2056 
2057     ////////////////////////////////////////////////////////////////
2058     //
2059     //    UTILITIES
2060     //
2061     ////////////////////////////////////////////////////////////////
2062 
2063     private char getc() throws IOException, SAXException {
2064 
2065         if (!doLexicalPE) {
2066             char c = in.getc();
2067             return c;
2068         }
2069 
2070         //
2071         // External parameter entities get funky processing of '%param;'
2072         // references.  It's not clearly defined in the XML spec; but it
2073         // boils down to having those refs be _lexical_ in most cases to
2074         // include partial syntax productions.  It also needs selective
2075         // enabling; "<!ENTITY % foo ...>" must work, for example, and
2076         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2077         // if it's expanded in a literal, else "ab  cd".  PEs also do
2078         // not expand within comments or PIs, and external PEs are only
2079         // allowed to have markup decls (and so aren't handled lexically).
2080         //
2081         // This PE handling should be merged into maybeWhitespace, where
2082         // it can be dealt with more consistently.
2083         //
2084         // Also, there are some validity constraints in this area.
2085         //
2086         char c;
2087 
2088         while (in.isEOF()) {
2089             if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2090                 in = in.pop();
2091             else {
2092                 fatal("P-064", new Object[]{in.getName()});
2093             }
2094         }
2095         if ((c = in.getc()) == '%' && doLexicalPE) {
2096             // PE ref ::= '%' name ';'
2097             String name = maybeGetName();
2098             Object entity;
2099 
2100             if (name == null)
2101                 fatal("P-011");
2102             nextChar(';', "F-021", name);
2103             entity = params.get(name);
2104 
2105             // push a magic "entity" before and after the
2106             // real one, so ungetc() behaves uniformly
2107             pushReader(" ".toCharArray(), null, false);
2108             if (entity instanceof InternalEntity)
2109                 pushReader(((InternalEntity) entity).buf, name, false);
2110             else if (entity instanceof ExternalEntity)
2111             // PEs can't be unparsed!
2112             // XXX if this returns false ...
2113                 pushReader((ExternalEntity) entity);
2114             else if (entity == null)
2115             // see note in maybePEReference re making this be nonfatal.
2116                 fatal("V-022");
2117             else
2118                 throw new InternalError();
2119             pushReader(" ".toCharArray(), null, false);
2120             return in.getc();
2121         }
2122         return c;
2123     }
2124 
2125     private void ungetc() {
2126 
2127         in.ungetc();
2128     }
2129 
2130     private boolean peek(String s)
2131             throws IOException, SAXException {
2132 
2133         return in.peek(s, null);
2134     }
2135 
2136     // Return the entity starting the specified declaration
2137     // (for validating declaration nesting) else null.
2138 
2139     private InputEntity peekDeclaration(String s)
2140             throws IOException, SAXException {
2141 
2142         InputEntity start;
2143 
2144         if (!in.peekc('<'))
2145             return null;
2146         start = in;
2147         if (in.peek(s, null))
2148             return start;
2149         in.ungetc();
2150         return null;
2151     }
2152 
2153     private void nextChar(char c, String location, String near)
2154             throws IOException, SAXException {
2155 
2156         while (in.isEOF() && !in.isDocument())
2157             in = in.pop();
2158         if (!in.peekc(c))
2159             fatal("P-008", new Object[]
2160             {new Character(c),
2161              messages.getMessage(locale, location),
2162              (near == null ? "" : ('"' + near + '"'))});
2163     }
2164 
2165 
2166     private void pushReader(char buf [], String name, boolean isGeneral)
2167             throws SAXException {
2168 
2169         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2170         r.init(buf, name, in, !isGeneral);
2171         in = r;
2172     }
2173 
2174     private boolean pushReader(ExternalEntity next)
2175             throws IOException, SAXException {
2176 
2177         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2178         InputSource s;
2179         try {
2180             s = next.getInputSource(resolver);
2181         } catch (IOException e) {
2182             String msg =
2183                     "unable to open the external entity from :" + next.systemId;
2184             if (next.publicId != null)
2185                 msg += " (public id:" + next.publicId + ")";
2186 
2187             SAXParseException spe = new SAXParseException(msg,
2188                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2189             dtdHandler.fatalError(spe);
2190             throw e;
2191         }
2192 
2193         r.init(s, next.name, in, next.isPE);
2194         in = r;
2195         return true;
2196     }
2197 
2198     public String getPublicId() {
2199 
2200         return (in == null) ? null : in.getPublicId();
2201     }
2202 
2203     public String getSystemId() {
2204 
2205         return (in == null) ? null : in.getSystemId();
2206     }
2207 
2208     public int getLineNumber() {
2209 
2210         return (in == null) ? -1 : in.getLineNumber();
2211     }
2212 
2213     public int getColumnNumber() {
2214 
2215         return (in == null) ? -1 : in.getColumnNumber();
2216     }
2217 
2218     // error handling convenience routines
2219 
2220     private void warning(String messageId, Object parameters [])
2221             throws SAXException {
2222 
2223         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2224                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2225 
2226         dtdHandler.warning(e);
2227     }
2228 
2229     void error(String messageId, Object parameters [])
2230             throws SAXException {
2231 
2232         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2233                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2234 
2235         dtdHandler.error(e);
2236     }
2237 
2238     private void fatal(String messageId) throws SAXException {
2239 
2240         fatal(messageId, null);
2241     }
2242 
2243     private void fatal(String messageId, Object parameters [])
2244             throws SAXException {
2245 
2246         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2247                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2248 
2249         dtdHandler.fatalError(e);
2250 
2251         throw e;
2252     }
2253 
2254     //
2255     // Map char arrays to strings ... cuts down both on memory and
2256     // CPU usage for element/attribute/other names that are reused.
2257     //
2258     // Documents typically repeat names a lot, so we more or less
2259     // intern all the strings within the document; since some strings
2260     // are repeated in multiple documents (e.g. stylesheets) we go
2261     // a bit further, and intern globally.
2262     //
2263     static class NameCache {
2264         //
2265         // Unless we auto-grow this, the default size should be a
2266         // reasonable bit larger than needed for most XML files
2267         // we've yet seen (and be prime).  If it's too small, the
2268         // penalty is just excess cache collisions.
2269         //
2270         NameCacheEntry hashtable [] = new NameCacheEntry[541];
2271 
2272         //
2273         // Usually we just want to get the 'symbol' for these chars
2274         //
2275         String lookup(char value [], int len) {
2276 
2277             return lookupEntry(value, len).name;
2278         }
2279 
2280         //
2281         // Sometimes we need to scan the chars in the resulting
2282         // string, so there's an accessor which exposes them.
2283         // (Mostly for element end tags.)
2284         //
2285         NameCacheEntry lookupEntry(char value [], int len) {
2286 
2287             int index = 0;
2288             NameCacheEntry entry;
2289 
2290             // hashing to get index
2291             for (int i = 0; i < len; i++)
2292                 index = index * 31 + value[i];
2293             index &= 0x7fffffff;
2294             index %= hashtable.length;
2295 
2296             // return entry if one's there ...
2297             for (entry = hashtable[index];
2298                  entry != null;
2299                  entry = entry.next) {
2300                 if (entry.matches(value, len))
2301                     return entry;
2302             }
2303 
2304             // else create new one
2305             entry = new NameCacheEntry();
2306             entry.chars = new char[len];
2307             System.arraycopy(value, 0, entry.chars, 0, len);
2308             entry.name = new String(entry.chars);
2309             //
2310             // NOTE:  JDK 1.1 has a fixed size string intern table,
2311             // with non-GC'd entries.  It can panic here; that's a
2312             // JDK problem, use 1.2 or later with many identifiers.
2313             //
2314             entry.name = entry.name.intern();        // "global" intern
2315             entry.next = hashtable[index];
2316             hashtable[index] = entry;
2317             return entry;
2318         }
2319     }
2320 
2321     static class NameCacheEntry {
2322 
2323         String name;
2324         char chars [];
2325         NameCacheEntry next;
2326 
2327         boolean matches(char value [], int len) {
2328 
2329             if (chars.length != len)
2330                 return false;
2331             for (int i = 0; i < len; i++)
2332                 if (value[i] != chars[i])
2333                     return false;
2334             return true;
2335         }
2336     }
2337 
2338     //
2339     // Message catalog for diagnostics.
2340     //
2341     static final Catalog messages = new Catalog();
2342 
2343     static final class Catalog extends MessageCatalog {
2344 
2345         Catalog() {
2346             super(DTDParser.class);
2347         }
2348     }
2349 
2350 }