< prev index next >

src/jdk.xml.bind/share/classes/com/sun/xml/internal/dtdparser/DTDParser.java

Print this page


   1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 import org.xml.sax.Locator;
  31 import org.xml.sax.SAXException;
  32 import org.xml.sax.SAXParseException;
  33 
  34 import java.io.IOException;
  35 import java.util.ArrayList;
  36 import java.util.Enumeration;
  37 import java.util.Hashtable;
  38 import java.util.Locale;
  39 import java.util.Set;
  40 import java.util.Vector;


  41 
  42 /**
  43  * This implements parsing of XML 1.0 DTDs.
  44  * <p/>
  45  * This conforms to the portion of the XML 1.0 specification related
  46  * to the external DTD subset.
  47  * <p/>
  48  * For multi-language applications (such as web servers using XML
  49  * processing to create dynamic content), a method supports choosing
  50  * a locale for parser diagnostics which is both understood by the
  51  * message recipient and supported by the parser.
  52  * <p/>
  53  * This parser produces a stream of parse events.  It supports some
  54  * features (exposing comments, CDATA sections, and entity references)
  55  * which are not required to be reported by conformant XML processors.
  56  *
  57  * @author David Brownell
  58  * @author Janet Koenig
  59  * @author Kohsuke KAWAGUCHI
  60  * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
  61  */
  62 public class DTDParser {

  63     public final static String TYPE_CDATA = "CDATA";
  64     public final static String TYPE_ID = "ID";
  65     public final static String TYPE_IDREF = "IDREF";
  66     public final static String TYPE_IDREFS = "IDREFS";
  67     public final static String TYPE_ENTITY = "ENTITY";
  68     public final static String TYPE_ENTITIES = "ENTITIES";
  69     public final static String TYPE_NMTOKEN = "NMTOKEN";
  70     public final static String TYPE_NMTOKENS = "NMTOKENS";
  71     public final static String TYPE_NOTATION = "NOTATION";
  72     public final static String TYPE_ENUMERATION = "ENUMERATION";
  73 
  74 
  75     // stack of input entities being merged
  76     private InputEntity in;
  77 
  78     // temporaries reused during parsing
  79     private StringBuffer strTmp;
  80     private char nameTmp [];
  81     private NameCache nameCache;
  82     private char charTmp [] = new char[2];
  83 
  84     // temporary DTD parsing state
  85     private boolean doLexicalPE;
  86 
  87     // DTD state, used during parsing
  88 //    private SimpleHashtable    elements = new SimpleHashtable (47);
  89     protected final Set declaredElements = new java.util.HashSet();
  90     private SimpleHashtable params = new SimpleHashtable(7);
  91 
  92     // exposed to package-private subclass
  93     Hashtable notations = new Hashtable(7);
  94     SimpleHashtable entities = new SimpleHashtable(17);
  95 
  96     private SimpleHashtable ids = new SimpleHashtable();
  97 
  98     // listeners for DTD parsing events
  99     private DTDEventListener dtdHandler;
 100 
 101     private EntityResolver resolver;
 102     private Locale locale;
 103 
 104     // string constants -- use these copies so "==" works
 105     // package private
 106     static final String strANY = "ANY";
 107     static final String strEMPTY = "EMPTY";
 108 


 109     /**
 110      * Used by applications to request locale for diagnostics.
 111      *
 112      * @param l The locale to use, or null to use system defaults
 113      *          (which may include only message IDs).
 114      */
 115     public void setLocale(Locale l) throws SAXException {
 116 
 117         if (l != null && !messages.isLocaleSupported(l.toString())) {
 118             throw new SAXException(messages.getMessage(locale,
 119                     "P-078", new Object[]{l}));
 120         }
 121         locale = l;
 122     }
 123 
 124     /**
 125      * Returns the diagnostic locale.
 126      */
 127     public Locale getLocale() {
 128         return locale;
 129     }
 130 
 131     /**
 132      * Chooses a client locale to use for diagnostics, using the first
 133      * language specified in the list that is supported by this parser.
 134      * That locale is then set using <a href="#setLocale(java.util.Locale)">
 135      * setLocale()</a>.  Such a list could be provided by a variety of user
 136      * preference mechanisms, including the HTTP <em>Accept-Language</em>
 137      * header field.
 138      *
 139      * @param languages Array of language specifiers, ordered with the most
 140      *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
 141      *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
 142      * @return The chosen locale, or null.
 143      * @see MessageCatalog
 144      */
 145     public Locale chooseLocale(String languages [])
 146             throws SAXException {
 147 
 148         Locale l = messages.chooseLocale(languages);
 149 
 150         if (l != null) {
 151             setLocale(l);
 152         }
 153         return l;
 154     }
 155 
 156     /**
 157      * Lets applications control entity resolution.
 158      */
 159     public void setEntityResolver(EntityResolver r) {
 160 
 161         resolver = r;
 162     }
 163 
 164     /**
 165      * Returns the object used to resolve entities
 166      */
 167     public EntityResolver getEntityResolver() {
 168 
 169         return resolver;
 170     }
 171 
 172     /**
 173      * Used by applications to set handling of DTD parsing events.
 174      */
 175     public void setDtdHandler(DTDEventListener handler) {
 176         dtdHandler = handler;
 177         if (handler != null)
 178             handler.setDocumentLocator(new Locator() {

 179                 public String getPublicId() {
 180                     return DTDParser.this.getPublicId();
 181                 }
 182 

 183                 public String getSystemId() {
 184                     return DTDParser.this.getSystemId();
 185                 }
 186 

 187                 public int getLineNumber() {
 188                     return DTDParser.this.getLineNumber();
 189                 }
 190 

 191                 public int getColumnNumber() {
 192                     return DTDParser.this.getColumnNumber();
 193                 }
 194             });
 195     }

 196 
 197     /**
 198      * Returns the handler used to for DTD parsing events.
 199      */
 200     public DTDEventListener getDtdHandler() {
 201         return dtdHandler;
 202     }
 203 
 204     /**
 205      * Parse a DTD.
 206      */
 207     public void parse(InputSource in)
 208             throws IOException, SAXException {
 209         init();
 210         parseInternal(in);
 211     }
 212 
 213     /**
 214      * Parse a DTD.
 215      */
 216     public void parse(String uri)
 217             throws IOException, SAXException {
 218         InputSource in;
 219 
 220         init();
 221         // System.out.println ("parse (\"" + uri + "\")");
 222         in = resolver.resolveEntity(null, uri);
 223 
 224         // If custom resolver punts resolution to parser, handle it ...
 225         if (in == null) {
 226             in = Resolver.createInputSource(new java.net.URL(uri), false);
 227 
 228             // ... or if custom resolver doesn't correctly construct the
 229             // input entity, patch it up enough so relative URIs work, and
 230             // issue a warning to minimize later confusion.
 231         } else if (in.getSystemId() == null) {
 232             warning("P-065", null);
 233             in.setSystemId(uri);
 234         }
 235 
 236         parseInternal(in);
 237     }
 238 
 239     // makes sure the parser is reset to "before a document"
 240     private void init() {
 241         in = null;
 242 
 243         // alloc temporary data used in parsing
 244         strTmp = new StringBuffer();
 245         nameTmp = new char[20];
 246         nameCache = new NameCache();
 247 
 248         // reset doc info
 249 //        isInAttribute = false;
 250 
 251         doLexicalPE = false;
 252 
 253         entities.clear();
 254         notations.clear();
 255         params.clear();
 256         //    elements.clear ();
 257         declaredElements.clear();
 258 
 259         // initialize predefined references ... re-interpreted later
 260         builtin("amp", "&");
 261         builtin("lt", "<");
 262         builtin("gt", ">");
 263         builtin("quot", "\"");
 264         builtin("apos", "'");
 265 
 266         if (locale == null)
 267             locale = Locale.getDefault();
 268         if (resolver == null)

 269             resolver = new Resolver();
 270         if (dtdHandler == null)

 271             dtdHandler = new DTDHandlerBase();
 272     }

 273 
 274     private void builtin(String entityName, String entityValue) {
 275         InternalEntity entity;
 276         entity = new InternalEntity(entityName, entityValue.toCharArray());
 277         entities.put(entityName, entity);
 278     }
 279 
 280 
 281     ////////////////////////////////////////////////////////////////
 282     //
 283     // parsing is by recursive descent, code roughly
 284     // following the BNF rules except tweaked for simple
 285     // lookahead.  rules are more or less in numeric order,
 286     // except where code sharing suggests other structures.
 287     //
 288     // a classic benefit of recursive descent parsers:  it's
 289     // relatively easy to get diagnostics that make sense.
 290     //
 291     ////////////////////////////////////////////////////////////////
 292 
 293 
 294     private void parseInternal(InputSource input)
 295             throws IOException, SAXException {
 296 
 297         if (input == null)
 298             fatal("P-000");

 299 
 300         try {
 301             in = InputEntity.getInputEntity(dtdHandler, locale);
 302             in.init(input, null, null, false);
 303 
 304             dtdHandler.startDTD(in);
 305 
 306             // [30] extSubset ::= TextDecl? extSubsetDecl
 307             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
 308             //        | PEReference | S )*
 309             //    ... same as [79] extPE, which is where the code is
 310 
 311             ExternalEntity externalSubset = new ExternalEntity(in);
 312             externalParameterEntity(externalSubset);
 313 
 314             if (!in.isEOF()) {
 315                 fatal("P-001", new Object[]
 316                 {Integer.toHexString(((int) getc()))});
 317             }
 318             afterRoot();
 319             dtdHandler.endDTD();
 320 
 321         } catch (EndOfInputException e) {
 322             if (!in.isDocument()) {
 323                 String name = in.getName();
 324                 do {    // force a relevant URI and line number
 325                     in = in.pop();
 326                 } while (in.isInternal());
 327                 fatal("P-002", new Object[]{name});
 328             } else {
 329                 fatal("P-003", null);
 330             }
 331         } catch (RuntimeException e) {
 332             // Don't discard location that triggered the exception
 333             // ## Should properly wrap exception
 334             System.err.print("Internal DTD parser error: "); // ##
 335             e.printStackTrace();
 336             throw new SAXParseException(e.getMessage() != null
 337                     ? e.getMessage() : e.getClass().getName(),
 338                     getPublicId(), getSystemId(),
 339                     getLineNumber(), getColumnNumber());
 340 
 341         } finally {
 342             // recycle temporary data used during parsing
 343             strTmp = null;
 344             nameTmp = null;
 345             nameCache = null;
 346 
 347             // ditto input sources etc
 348             if (in != null) {
 349                 in.close();
 350                 in = null;
 351             }
 352 
 353             // get rid of all DTD info ... some of it would be
 354             // useful for editors etc, investigate later.
 355 
 356             params.clear();
 357             entities.clear();
 358             notations.clear();
 359             declaredElements.clear();
 360 //        elements.clear();
 361             ids.clear();
 362         }
 363     }
 364 
 365     void afterRoot() throws SAXException {
 366         // Make sure all IDREFs match declared ID attributes.  We scan
 367         // after the document element is parsed, since XML allows forward
 368         // references, and only now can we know if they're all resolved.
 369 
 370         for (Enumeration e = ids.keys();
 371              e.hasMoreElements();
 372                 ) {
 373             String id = (String) e.nextElement();
 374             Boolean value = (Boolean) ids.get(id);
 375             if (Boolean.FALSE == value)
 376                 error("V-024", new Object[]{id});
 377         }
 378     }
 379 
 380 
 381     // role is for diagnostics
 382     private void whitespace(String roleId)
 383             throws IOException, SAXException {
 384 
 385         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
 386         if (!maybeWhitespace()) {
 387             fatal("P-004", new Object[]
 388             {messages.getMessage(locale, roleId)});
 389         }
 390     }
 391 
 392     // S?
 393     private boolean maybeWhitespace()
 394             throws IOException, SAXException {
 395 
 396         if (!doLexicalPE)
 397             return in.maybeWhitespace();

 398 
 399         // see getc() for the PE logic -- this lets us splice
 400         // expansions of PEs in "anywhere".  getc() has smarts,
 401         // so for external PEs we don't bypass it.
 402 
 403         // XXX we can marginally speed PE handling, and certainly
 404         // be cleaner (hence potentially more correct), by using
 405         // the observations that expanded PEs only start and stop
 406         // where whitespace is allowed.  getc wouldn't need any
 407         // "lexical" PE expansion logic, and no other method needs
 408         // to handle termination of PEs.  (parsing of literals would
 409         // still need to pop entities, but not parsing of references
 410         // in content.)
 411 
 412         char c = getc();
 413         boolean saw = false;
 414 
 415         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 416             saw = true;
 417 
 418             // this gracefully ends things when we stop playing
 419             // with internal parameters.  caller should have a
 420             // grammar rule allowing whitespace at end of entity.
 421             if (in.isEOF() && !in.isInternal())
 422                 return saw;

 423             c = getc();
 424         }
 425         ungetc();
 426         return saw;
 427     }
 428 
 429     private String maybeGetName()
 430             throws IOException, SAXException {
 431 
 432         NameCacheEntry entry = maybeGetNameCacheEntry();
 433         return (entry == null) ? null : entry.name;
 434     }
 435 
 436     private NameCacheEntry maybeGetNameCacheEntry()
 437             throws IOException, SAXException {
 438 
 439         // [5] Name ::= (Letter|'_'|':') (Namechar)*
 440         char c = getc();
 441 
 442         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
 443             ungetc();
 444             return null;
 445         }
 446         return nameCharString(c);
 447     }
 448 
 449     // Used when parsing enumerations
 450     private String getNmtoken()
 451             throws IOException, SAXException {
 452 
 453         // [7] Nmtoken ::= (Namechar)+
 454         char c = getc();
 455         if (!XmlChars.isNameChar(c))
 456             fatal("P-006", new Object[]{new Character(c)});

 457         return nameCharString(c).name;
 458     }
 459 
 460     // n.b. this gets used when parsing attribute values (for
 461     // internal references) so we can't use strTmp; it's also
 462     // a hotspot for CPU and memory in the parser (called at least
 463     // once for each element) so this has been optimized a bit.
 464 
 465     private NameCacheEntry nameCharString(char c)
 466             throws IOException, SAXException {
 467 
 468         int i = 1;
 469 
 470         nameTmp[0] = c;
 471         for (; ;) {
 472             if ((c = in.getNameChar()) == 0)
 473                 break;

 474             if (i >= nameTmp.length) {
 475                 char tmp [] = new char[nameTmp.length + 10];
 476                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
 477                 nameTmp = tmp;
 478             }
 479             nameTmp[i++] = c;
 480         }
 481         return nameCache.lookupEntry(nameTmp, i);
 482     }
 483 
 484     //
 485     // much similarity between parsing entity values in DTD
 486     // and attribute values (in DTD or content) ... both follow
 487     // literal parsing rules, newline canonicalization, etc
 488     //
 489     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
 490     // or else partially normalized attribute value (the first bit
 491     // of 3.3.3's spec, without the "if not CDATA" bits).
 492     //

 493     private void parseLiteral(boolean isEntityValue)
 494             throws IOException, SAXException {
 495 
 496         // [9] EntityValue ::=
 497         //    '"' ([^"&%] | Reference | PEReference)* '"'
 498         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
 499         // [10] AttValue ::=
 500         //    '"' ([^"&]  | Reference             )* '"'
 501         //    |    "'" ([^'&]  | Reference             )* "'"
 502         char quote = getc();
 503         char c;
 504         InputEntity source = in;
 505 
 506         if (quote != '\'' && quote != '"') {
 507             fatal("P-007");
 508         }
 509 
 510         // don't report entity expansions within attributes,
 511         // they're reported "fully expanded" via SAX
 512 //    isInAttribute = !isEntityValue;
 513 
 514         // get value into strTmp
 515         strTmp = new StringBuffer();
 516 
 517         // scan, allowing entity push/pop wherever ...
 518         // expanded entities can't terminate the literal!
 519         for (; ;) {
 520             if (in != source && in.isEOF()) {
 521                 // we don't report end of parsed entities
 522                 // within attributes (no SAX hooks)
 523                 in = in.pop();
 524                 continue;
 525             }
 526             if ((c = getc()) == quote && in == source) {
 527                 break;
 528             }
 529 
 530             //
 531             // Basically the "reference in attribute value"
 532             // row of the chart in section 4.4 of the spec
 533             //
 534             if (c == '&') {
 535                 String entityName = maybeGetName();
 536 
 537                 if (entityName != null) {
 538                     nextChar(';', "F-020", entityName);
 539 
 540                     // 4.4 says:  bypass these here ... we'll catch
 541                     // forbidden refs to unparsed entities on use
 542                     if (isEntityValue) {
 543                         strTmp.append('&');
 544                         strTmp.append(entityName);
 545                         strTmp.append(';');
 546                         continue;
 547                     }
 548                     expandEntityInLiteral(entityName, entities, isEntityValue);
 549 
 550 
 551                     // character references are always included immediately
 552                 } else if ((c = getc()) == '#') {
 553                     int tmp = parseCharNumber();
 554 
 555                     if (tmp > 0xffff) {
 556                         tmp = surrogatesToCharTmp(tmp);
 557                         strTmp.append(charTmp[0]);
 558                         if (tmp == 2)
 559                             strTmp.append(charTmp[1]);
 560                     } else

 561                         strTmp.append((char) tmp);
 562                 } else

 563                     fatal("P-009");

 564                 continue;
 565 
 566             }
 567 
 568             // expand parameter entities only within entity value literals
 569             if (c == '%' && isEntityValue) {
 570                 String entityName = maybeGetName();
 571 
 572                 if (entityName != null) {
 573                     nextChar(';', "F-021", entityName);
 574                     expandEntityInLiteral(entityName, params, isEntityValue);
 575                     continue;
 576                 } else
 577                     fatal("P-011");
 578             }

 579 
 580             // For attribute values ...
 581             if (!isEntityValue) {
 582                 // 3.3.3 says whitespace normalizes to space...
 583                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 584                     strTmp.append(' ');
 585                     continue;
 586                 }
 587 
 588                 // "<" not legal in parsed literals ...
 589                 if (c == '<')
 590                     fatal("P-012");
 591             }

 592 
 593             strTmp.append(c);
 594         }
 595 //    isInAttribute = false;
 596     }
 597 
 598     // does a SINGLE expansion of the entity (often reparsed later)
 599     private void expandEntityInLiteral(String name, SimpleHashtable table,
 600                                        boolean isEntityValue)
 601             throws IOException, SAXException {
 602 
 603         Object entity = table.get(name);
 604 
 605         if (entity instanceof InternalEntity) {
 606             InternalEntity value = (InternalEntity) entity;
 607             pushReader(value.buf, name, !value.isPE);
 608 
 609         } else if (entity instanceof ExternalEntity) {
 610             if (!isEntityValue)    // must be a PE ...

 611                 fatal("P-013", new Object[]{name});

 612             // XXX if this returns false ...
 613             pushReader((ExternalEntity) entity);
 614 
 615         } else if (entity == null) {
 616             //
 617             // Note:  much confusion about whether spec requires such
 618             // errors to be fatal in many cases, but none about whether
 619             // it allows "normal" errors to be unrecoverable!
 620             //
 621             fatal((table == params) ? "V-022" : "P-014",
 622                     new Object[]{name});
 623         }
 624     }
 625 
 626     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
 627     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
 628 
 629     // NOTE:  XML spec should explicitly say that PE ref syntax is
 630     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
 631     // values ... can't process the XML spec's own DTD without doing
 632     // that for comments.
 633 
 634     private String getQuotedString(String type, String extra)
 635             throws IOException, SAXException {
 636 
 637         // use in.getc to bypass PE processing
 638         char quote = in.getc();
 639 
 640         if (quote != '\'' && quote != '"')
 641             fatal("P-015", new Object[]{
 642                 messages.getMessage(locale, type, new Object[]{extra})
 643             });

 644 
 645         char c;
 646 
 647         strTmp = new StringBuffer();
 648         while ((c = in.getc()) != quote)
 649             strTmp.append((char) c);

 650         return strTmp.toString();
 651     }
 652 
 653 
 654     private String parsePublicId() throws IOException, SAXException {
 655 
 656         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
 657         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
 658         String retval = getQuotedString("F-033", null);
 659         for (int i = 0; i < retval.length(); i++) {
 660             char c = retval.charAt(i);
 661             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
 662                     && !(c >= 'A' && c <= 'Z')
 663                     && !(c >= 'a' && c <= 'z'))
 664                 fatal("P-016", new Object[]{new Character(c)});

 665         }
 666         strTmp = new StringBuffer();
 667         strTmp.append(retval);
 668         return normalize(false);
 669     }
 670 
 671     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 672     // handled by:  InputEntity.parsedContent()
 673 
 674     private boolean maybeComment(boolean skipStart)
 675             throws IOException, SAXException {
 676 
 677         // [15] Comment ::= '<!--'
 678         //        ( (Char - '-') | ('-' (Char - '-'))*
 679         //        '-->'
 680         if (!in.peek(skipStart ? "!--" : "<!--", null))
 681             return false;

 682 
 683         boolean savedLexicalPE = doLexicalPE;
 684         boolean saveCommentText;
 685 
 686         doLexicalPE = false;
 687         saveCommentText = false;
 688         if (saveCommentText)
 689             strTmp = new StringBuffer();

 690 
 691         oneComment:
 692         for (; ;) {
 693             try {
 694                 // bypass PE expansion, but permit PEs
 695                 // to complete ... valid docs won't care.
 696                 for (; ;) {
 697                     int c = getc();
 698                     if (c == '-') {
 699                         c = getc();
 700                         if (c != '-') {
 701                             if (saveCommentText)
 702                                 strTmp.append('-');

 703                             ungetc();
 704                             continue;
 705                         }
 706                         nextChar('>', "F-022", null);
 707                         break oneComment;
 708                     }
 709                     if (saveCommentText)
 710                         strTmp.append((char) c);
 711                 }

 712             } catch (EndOfInputException e) {
 713                 //
 714                 // This is fatal EXCEPT when we're processing a PE...
 715                 // in which case a validating processor reports an error.
 716                 // External PEs are easy to detect; internal ones we
 717                 // infer by being an internal entity outside an element.
 718                 //
 719                 if (in.isInternal()) {
 720                     error("V-021", null);
 721                 }
 722                 fatal("P-017");
 723             }
 724         }
 725         doLexicalPE = savedLexicalPE;
 726         if (saveCommentText)
 727             dtdHandler.comment(strTmp.toString());

 728         return true;
 729     }
 730 
 731     private boolean maybePI(boolean skipStart)
 732             throws IOException, SAXException {
 733 
 734         // [16] PI ::= '<?' PITarget
 735         //        (S (Char* - (Char* '?>' Char*)))?
 736         //        '?>'
 737         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
 738         boolean savedLexicalPE = doLexicalPE;
 739 
 740         if (!in.peek(skipStart ? "?" : "<?", null))
 741             return false;

 742         doLexicalPE = false;
 743 
 744         String target = maybeGetName();
 745 
 746         if (target == null) {
 747             fatal("P-018");
 748         }
 749         if ("xml".equals(target)) {
 750             fatal("P-019");
 751         }
 752         if ("xml".equalsIgnoreCase(target)) {
 753             fatal("P-020", new Object[]{target});
 754         }
 755 
 756         if (maybeWhitespace()) {
 757             strTmp = new StringBuffer();
 758             try {
 759                 for (; ;) {
 760                     // use in.getc to bypass PE processing
 761                     char c = in.getc();
 762                     //Reached the end of PI.
 763                     if (c == '?' && in.peekc('>'))
 764                         break;

 765                     strTmp.append(c);
 766                 }
 767             } catch (EndOfInputException e) {
 768                 fatal("P-021");
 769             }
 770             dtdHandler.processingInstruction(target, strTmp.toString());
 771         } else {
 772             if (!in.peek("?>", null)) {
 773                 fatal("P-022");
 774             }
 775             dtdHandler.processingInstruction(target, "");
 776         }
 777 
 778         doLexicalPE = savedLexicalPE;
 779         return true;
 780     }
 781 
 782     // [18] CDSect ::= CDStart CData CDEnd
 783     // [19] CDStart ::= '<![CDATA['
 784     // [20] CData ::= (Char* - (Char* ']]>' Char*))
 785     // [21] CDEnd ::= ']]>'
 786     //
 787     //    ... handled by InputEntity.unparsedContent()
 788 
 789     // collapsing several rules together ...
 790     // simpler than attribute literals -- no reference parsing!
 791     private String maybeReadAttribute(String name, boolean must)
 792             throws IOException, SAXException {
 793 
 794         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
 795         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
 796         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
 797         if (!maybeWhitespace()) {
 798             if (!must) {
 799                 return null;
 800             }
 801             fatal("P-024", new Object[]{name});
 802             // NOTREACHED
 803         }
 804 
 805         if (!peek(name)) {
 806             if (must) {
 807                 fatal("P-024", new Object[]{name});
 808             } else {


 812                 ungetc();
 813                 return null;
 814             }
 815         }
 816 
 817         // [25] Eq ::= S? '=' S?
 818         maybeWhitespace();
 819         nextChar('=', "F-023", null);
 820         maybeWhitespace();
 821 
 822         return getQuotedString("F-035", name);
 823     }
 824 
 825     private void readVersion(boolean must, String versionNum)
 826             throws IOException, SAXException {
 827 
 828         String value = maybeReadAttribute("version", must);
 829 
 830         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
 831 
 832         if (must && value == null)
 833             fatal("P-025", new Object[]{versionNum});

 834         if (value != null) {
 835             int length = value.length();
 836             for (int i = 0; i < length; i++) {
 837                 char c = value.charAt(i);
 838                 if (!((c >= '0' && c <= '9')
 839                         || c == '_' || c == '.'
 840                         || (c >= 'a' && c <= 'z')
 841                         || (c >= 'A' && c <= 'Z')
 842                         || c == ':' || c == '-')
 843                 )
 844                     fatal("P-026", new Object[]{value});
 845             }
 846         }
 847         if (value != null && !value.equals(versionNum))

 848             error("P-027", new Object[]{versionNum, value});
 849     }

 850 
 851     // common code used by most markup declarations
 852     // ... S (Q)Name ...
 853     private String getMarkupDeclname(String roleId, boolean qname)
 854             throws IOException, SAXException {
 855 
 856         String name;
 857 
 858         whitespace(roleId);
 859         name = maybeGetName();
 860         if (name == null)
 861             fatal("P-005", new Object[]
 862             {messages.getMessage(locale, roleId)});
 863         return name;
 864     }
 865 
 866     private boolean maybeMarkupDecl()
 867             throws IOException, SAXException {
 868 
 869         // [29] markupdecl ::= elementdecl | Attlistdecl
 870         //           | EntityDecl | NotationDecl | PI | Comment
 871         return maybeElementDecl()
 872                 || maybeAttlistDecl()
 873                 || maybeEntityDecl()
 874                 || maybeNotationDecl()
 875                 || maybePI(false)
 876                 || maybeComment(false);
 877     }
 878 
 879     private static final String XmlLang = "xml:lang";
 880 
 881     private boolean isXmlLang(String value) {
 882 
 883         // [33] LanguageId ::= Langcode ('-' Subcode)*
 884         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
 885         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
 886         // [36] IanaCode ::= [iI] '-' SubCode
 887         // [37] UserCode ::= [xX] '-' SubCode
 888         // [38] SubCode ::= [a-zA-Z]+
 889 
 890         // the ISO and IANA codes (and subcodes) are registered,
 891         // but that's neither a WF nor a validity constraint.
 892 
 893         int nextSuffix;
 894         char c;
 895 
 896         if (value.length() < 2)
 897             return false;

 898         c = value.charAt(1);
 899         if (c == '-') {        // IANA, or user, code
 900             c = value.charAt(0);
 901             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
 902                 return false;

 903             nextSuffix = 1;
 904         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 905             // 2 letter ISO code, or error
 906             c = value.charAt(0);
 907             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
 908                 return false;

 909             nextSuffix = 2;
 910         } else
 911             return false;

 912 
 913         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
 914         while (nextSuffix < value.length()) {
 915             c = value.charAt(nextSuffix);
 916             if (c != '-')
 917                 break;

 918             while (++nextSuffix < value.length()) {
 919                 c = value.charAt(nextSuffix);
 920                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
 921                     break;
 922             }
 923         }

 924         return value.length() == nextSuffix && c != '-';
 925     }
 926 
 927 
 928     //
 929     // CHAPTER 3:  Logical Structures
 930     //
 931 
 932     /**
 933      * To validate, subclassers should at this time make sure that
 934      * values are of the declared types:<UL>
 935      * <LI> ID and IDREF(S) values are Names
 936      * <LI> NMTOKEN(S) are Nmtokens
 937      * <LI> ENUMERATION values match one of the tokens
 938      * <LI> NOTATION values match a notation name
 939      * <LI> ENTITIY(IES) values match an unparsed external entity
 940      * </UL>
 941      * <p/>
 942      * <P> Separately, make sure IDREF values match some ID
 943      * provided in the document (in the afterRoot method).
 944      */
 945 /*    void validateAttributeSyntax (Attribute attr, String value)
 946          throws DTDParseException {
 947         // ID, IDREF(S) ... values are Names
 948         if (Attribute.ID == attr.type()) {
 949             if (!XmlNames.isName (value))
 950                 error ("V-025", new Object [] { value });
 951 
 952             Boolean             b = (Boolean) ids.getNonInterned (value);
 953             if (b == null || b.equals (Boolean.FALSE))
 954                 ids.put (value.intern (), Boolean.TRUE);
 955             else
 956                 error ("V-026", new Object [] { value });
 957 
 958         } else if (Attribute.IDREF == attr.type()) {
 959             if (!XmlNames.isName (value))
 960                 error ("V-027", new Object [] { value });
 961 
 962             Boolean             b = (Boolean) ids.getNonInterned (value);
 963             if (b == null)
 964                 ids.put (value.intern (), Boolean.FALSE);
 965 


1028             if (!isUnparsedEntity (value))
1029                 error ("V-031", new Object [] { value });
1030 
1031         } else if (Attribute.ENTITIES == attr.type()) {
1032             StringTokenizer     tokenizer = new StringTokenizer (value);
1033             boolean             sawValue = false;
1034 
1035             while (tokenizer.hasMoreTokens ()) {
1036                 value = tokenizer.nextToken ();
1037                 // see note above re standalone
1038                 if (!isUnparsedEntity (value))
1039                     error ("V-031", new Object [] { value });
1040                 sawValue = true;
1041             }
1042             if (!sawValue)
1043                 error ("V-040", null);
1044 
1045         } else if (Attribute.CDATA != attr.type())
1046             throw new InternalError (attr.type());
1047     }
1048 */
1049 /*
1050     private boolean isUnparsedEntity (String name)
1051     {
1052         Object e = entities.getNonInterned (name);
1053         if (e == null || !(e instanceof ExternalEntity))
1054             return false;
1055         return ((ExternalEntity)e).notation != null;
1056     }
1057 */
1058     private boolean maybeElementDecl()
1059             throws IOException, SAXException {
1060 
1061         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1062         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1063         InputEntity start = peekDeclaration("!ELEMENT");
1064 
1065         if (start == null)
1066             return false;

1067 
1068         // n.b. for content models where inter-element whitespace is
1069         // ignorable, we mark that fact here.
1070         String name = getMarkupDeclname("F-015", true);
1071 //    Element        element = (Element) elements.get (name);
1072 //    boolean        declEffective = false;
1073 
1074 /*
1075     if (element != null) {
1076         if (element.contentModel() != null) {
1077             error ("V-012", new Object [] { name });
1078         } // else <!ATTLIST name ...> came first
1079     } else {
1080         element = new Element(name);
1081         elements.put (element.name(), element);
1082         declEffective = true;
1083     }
1084 */
1085         if (declaredElements.contains(name))
1086             error("V-012", new Object[]{name});
1087         else {
1088             declaredElements.add(name);
1089 //        declEffective = true;
1090         }
1091 
1092         short modelType;
1093         whitespace("F-000");
1094         if (peek(strEMPTY)) {
1095 ///        // leave element.contentModel as null for this case.
1096             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1097         } else if (peek(strANY)) {
1098 ///        element.setContentModel(new StringModel(StringModelType.ANY));
1099             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1100         } else {
1101             modelType = getMixedOrChildren(name);
1102         }
1103 
1104         dtdHandler.endContentModel(name, modelType);
1105 
1106         maybeWhitespace();
1107         char c = getc();
1108         if (c != '>')
1109             fatal("P-036", new Object[]{name, new Character(c)});
1110         if (start != in)

1111             error("V-013", null);

1112 
1113 ///        dtdHandler.elementDecl(element);
1114 
1115         return true;
1116     }
1117 
1118     // We're leaving the content model as a regular expression;
1119     // it's an efficient natural way to express such things, and
1120     // libraries often interpret them.  No whitespace in the
1121     // model we store, though!
1122 
1123     /**
1124      * returns content model type.
1125      */
1126     private short getMixedOrChildren(String elementName/*Element element*/)
1127             throws IOException, SAXException {
1128 
1129         InputEntity start;
1130 
1131         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1132         strTmp = new StringBuffer();
1133 
1134         nextChar('(', "F-028", elementName);
1135         start = in;
1136         maybeWhitespace();
1137         strTmp.append('(');
1138 
1139         short modelType;
1140         if (peek("#PCDATA")) {
1141             strTmp.append("#PCDATA");
1142             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);


1169             String tag;
1170 
1171             tag = maybeGetName();
1172             if (tag != null) {
1173                 strTmp.append(tag);
1174 //                temp = new ElementModel(tag);
1175 //                getFrequency((RepeatableContent)temp);
1176 ///->
1177                 dtdHandler.childElement(tag, getFrequency());
1178 ///<-
1179             } else if (peek("(")) {
1180                 InputEntity next = in;
1181                 strTmp.append('(');
1182                 maybeWhitespace();
1183 //                temp = getcps(element, next);
1184 //                getFrequency(temp);
1185 ///->
1186                 getcps(elementName, next);
1187 ///                getFrequency();        <- this looks like a bug
1188 ///<-
1189             } else
1190                 fatal((type == 0) ? "P-039" :
1191                         ((type == ',') ? "P-037" : "P-038"),
1192                         new Object[]{new Character(getc())});

1193 
1194             maybeWhitespace();
1195             if (decided) {
1196                 char c = getc();
1197 
1198 //                if (current != null) {
1199 //                    current.addChild(temp);
1200 //                }
1201                 if (c == type) {
1202                     strTmp.append(type);
1203                     maybeWhitespace();
1204                     reportConnector(type);
1205                     continue;
1206                 } else if (c == '\u0029') {    // rparen
1207                     ungetc();
1208                     continue;
1209                 } else {
1210                     fatal((type == 0) ? "P-041" : "P-040",
1211                             new Object[]{
1212                                 new Character(c),
1213                                 new Character(type)
1214                             });
1215                 }
1216             } else {
1217                 type = getc();
1218                 switch (type) {
1219                 case '|':
1220                 case ',':
1221                     reportConnector(type);
1222                     break;
1223                 default:
1224 //                        retval = temp;
1225                     ungetc();
1226                     continue;
1227                 }
1228 //                retval = (ContentModel)current;
1229                 decided = true;
1230 //                current.addChild(temp);
1231                 strTmp.append(type);
1232             }
1233             maybeWhitespace();
1234         } while (!peek(")"));
1235 
1236         if (in != start)
1237             error("V-014", new Object[]{elementName});

1238         strTmp.append(')');
1239 
1240         dtdHandler.endModelGroup(getFrequency());
1241 //        return retval;
1242     }
1243 
1244     private void reportConnector(char type) throws SAXException {
1245         switch (type) {
1246         case '|':
1247             dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
1248             return;
1249         case ',':
1250             dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1251             return;
1252         default:
1253             throw new Error();    //assertion failed.
1254         }
1255     }
1256 
1257     private short getFrequency()


1269             //        original.setRepeat(Repeat.ONE_OR_MORE);
1270         } else if (c == '*') {
1271             strTmp.append(c);
1272             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1273             //        original.setRepeat(Repeat.ZERO_OR_MORE);
1274         } else {
1275             ungetc();
1276             return DTDEventListener.OCCURENCE_ONCE;
1277         }
1278     }
1279 
1280     // '(' S? '#PCDATA' already consumed
1281     // matching ')' must be in "start" entity if validating
1282     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1283             throws IOException, SAXException {
1284 
1285         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1286         //        | '(' S? '#PCDATA'                   S? ')'
1287         maybeWhitespace();
1288         if (peek("\u0029*") || peek("\u0029")) {
1289             if (in != start)
1290                 error("V-014", new Object[]{elementName});

1291             strTmp.append(')');
1292 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1293             return;
1294         }
1295 
1296         ArrayList l = new ArrayList();
1297 //    l.add(new StringModel(StringModelType.PCDATA));
1298 
1299 
1300         while (peek("|")) {
1301             String name;
1302 
1303             strTmp.append('|');
1304             maybeWhitespace();
1305 
1306             doLexicalPE = true;
1307             name = maybeGetName();
1308             if (name == null)
1309                 fatal("P-042", new Object[]
1310                 {elementName, Integer.toHexString(getc())});
1311             if (l.contains(name)) {
1312                 error("V-015", new Object[]{name});
1313             } else {
1314                 l.add(name);
1315                 dtdHandler.mixedElement(name);
1316             }
1317             strTmp.append(name);
1318             maybeWhitespace();
1319         }
1320 
1321         if (!peek("\u0029*"))    // right paren
1322             fatal("P-043", new Object[]
1323             {elementName, new Character(getc())});
1324         if (in != start)

1325             error("V-014", new Object[]{elementName});

1326         strTmp.append(')');
1327 //        ChoiceModel cm = new ChoiceModel((Collection)l);
1328 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1329 //       element.setContentModel(cm);
1330     }
1331 
1332     private boolean maybeAttlistDecl()
1333             throws IOException, SAXException {
1334 
1335         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1336         InputEntity start = peekDeclaration("!ATTLIST");
1337 
1338         if (start == null)
1339             return false;

1340 
1341         String elementName = getMarkupDeclname("F-016", true);
1342 //    Element    element = (Element) elements.get (name);
1343 
1344 //    if (element == null) {
1345 //        // not yet declared -- no problem.
1346 //        element = new Element(name);
1347 //        elements.put(name, element);
1348 //    }
1349 
1350         while (!peek(">")) {
1351 
1352             // [53] AttDef ::= S Name S AttType S DefaultDecl
1353             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1354 
1355             // look for global attribute definitions, don't expand for now...
1356             maybeWhitespace();
1357             char c = getc();
1358             if (c == '%') {
1359                 String entityName = maybeGetName();
1360                 if (entityName != null) {
1361                     nextChar(';', "F-021", entityName);
1362                     whitespace("F-021");
1363                     continue;
1364                 } else
1365                     fatal("P-011");
1366             }

1367 
1368             ungetc();
1369             // look for attribute name otherwise
1370             String attName = maybeGetName();
1371             if (attName == null) {
1372                 fatal("P-044", new Object[]{new Character(getc())});
1373             }
1374             whitespace("F-001");
1375 
1376 ///        Attribute    a = new Attribute (name);
1377 
1378             String typeName;
1379             Vector values = null;    // notation/enumeration values
1380 
1381             // Note:  use the type constants from Attribute
1382             // so that "==" may be used (faster)
1383 
1384             // [55] StringType ::= 'CDATA'
1385             if (peek(TYPE_CDATA))
1386 ///            a.setType(Attribute.CDATA);
1387                 typeName = TYPE_CDATA;
1388 
1389             // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1390             //        | 'ENTITY' | 'ENTITIES'
1391             //        | 'NMTOKEN' | 'NMTOKENS'
1392             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1393             // match peekahead ... so this order matters!
1394             else if (peek(TYPE_IDREFS))
1395                 typeName = TYPE_IDREFS;
1396             else if (peek(TYPE_IDREF))
1397                 typeName = TYPE_IDREF;
1398             else if (peek(TYPE_ID)) {
1399                 typeName = TYPE_ID;
1400 // TODO: should implement this error check?
1401 ///        if (element.id() != null) {
1402 ///                    error ("V-016", new Object [] { element.id() });
1403 ///        } else
1404 ///            element.setId(name);
1405             } else if (peek(TYPE_ENTITY))
1406                 typeName = TYPE_ENTITY;
1407             else if (peek(TYPE_ENTITIES))
1408                 typeName = TYPE_ENTITIES;
1409             else if (peek(TYPE_NMTOKENS))
1410                 typeName = TYPE_NMTOKENS;
1411             else if (peek(TYPE_NMTOKEN))
1412                 typeName = TYPE_NMTOKEN;
1413 
1414             // [57] EnumeratedType ::= NotationType | Enumeration
1415             // [58] NotationType ::= 'NOTATION' S '(' S? Name
1416             //        (S? '|' S? Name)* S? ')'
1417             else if (peek(TYPE_NOTATION)) {
1418                 typeName = TYPE_NOTATION;
1419                 whitespace("F-002");
1420                 nextChar('(', "F-029", null);
1421                 maybeWhitespace();
1422 
1423                 values = new Vector();
1424                 do {
1425                     String name;
1426                     if ((name = maybeGetName()) == null)
1427                         fatal("P-068");

1428                     // permit deferred declarations
1429                     if (notations.get(name) == null)
1430                         notations.put(name, name);

1431                     values.addElement(name);
1432                     maybeWhitespace();
1433                     if (peek("|"))
1434                         maybeWhitespace();

1435                 } while (!peek(")"));
1436 ///            a.setValues(new String [v.size ()]);
1437 ///            for (int i = 0; i < v.size (); i++)
1438 ///                a.setValue(i, (String)v.elementAt(i));
1439 
1440                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1441             } else if (peek("(")) {
1442 ///            a.setType(Attribute.ENUMERATION);
1443                 typeName = TYPE_ENUMERATION;
1444 
1445                 maybeWhitespace();
1446 
1447 ///            Vector v = new Vector ();
1448                 values = new Vector();
1449                 do {
1450                     String name = getNmtoken();
1451 ///                v.addElement (name);
1452                     values.addElement(name);
1453                     maybeWhitespace();
1454                     if (peek("|"))
1455                         maybeWhitespace();

1456                 } while (!peek(")"));
1457 ///            a.setValues(new String [v.size ()]);
1458 ///            for (int i = 0; i < v.size (); i++)
1459 ///                a.setValue(i, (String)v.elementAt(i));
1460             } else {
1461                 fatal("P-045",
1462                         new Object[]{attName, new Character(getc())});
1463                 typeName = null;
1464             }
1465 
1466             short attributeUse;
1467             String defaultValue = null;
1468 
1469             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1470             //        | (('#FIXED' S)? AttValue)
1471             whitespace("F-003");
1472             if (peek("#REQUIRED"))
1473                 attributeUse = DTDEventListener.USE_REQUIRED;
1474 ///            a.setIsRequired(true);
1475             else if (peek("#FIXED")) {
1476 ///            if (a.type() == Attribute.ID)
1477                 if (typeName == TYPE_ID)
1478                     error("V-017", new Object[]{attName});

1479 ///            a.setIsFixed(true);
1480                 attributeUse = DTDEventListener.USE_FIXED;
1481                 whitespace("F-004");
1482                 parseLiteral(false);
1483 ///            if (a.type() != Attribute.CDATA)
1484 ///                a.setDefaultValue(normalize(false));
1485 ///            else
1486 ///                a.setDefaultValue(strTmp.toString());
1487 
1488                 if (typeName == TYPE_CDATA)
1489                     defaultValue = normalize(false);
1490                 else
1491                     defaultValue = strTmp.toString();

1492 
1493 // TODO: implement this check
1494 ///            if (a.type() != Attribute.CDATA)
1495 ///                validateAttributeSyntax (a, a.defaultValue());
1496             } else if (!peek("#IMPLIED")) {
1497                 attributeUse = DTDEventListener.USE_IMPLIED;
1498 
1499 ///            if (a.type() == Attribute.ID)
1500                 if (typeName == TYPE_ID)
1501                     error("V-018", new Object[]{attName});

1502                 parseLiteral(false);
1503 ///            if (a.type() != Attribute.CDATA)
1504 ///                a.setDefaultValue(normalize(false));
1505 ///            else
1506 ///                a.setDefaultValue(strTmp.toString());
1507                 if (typeName == TYPE_CDATA)
1508                     defaultValue = normalize(false);
1509                 else
1510                     defaultValue = strTmp.toString();

1511 
1512 // TODO: implement this check
1513 ///            if (a.type() != Attribute.CDATA)
1514 ///                validateAttributeSyntax (a, a.defaultValue());
1515             } else {
1516                 // TODO: this looks like an fatal error.
1517                 attributeUse = DTDEventListener.USE_NORMAL;
1518             }
1519 
1520             if (XmlLang.equals(attName)
1521                     && defaultValue/* a.defaultValue()*/ != null
1522                     && !isXmlLang(defaultValue/*a.defaultValue()*/))
1523                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});

1524 
1525 // TODO: isn't it an error to specify the same attribute twice?
1526 ///        if (!element.attributes().contains(a)) {
1527 ///            element.addAttribute(a);
1528 ///            dtdHandler.attributeDecl(a);
1529 ///        }
1530 
1531             String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
1532             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1533             maybeWhitespace();
1534         }
1535         if (start != in)
1536             error("V-013", null);

1537         return true;
1538     }
1539 
1540     // used when parsing literal attribute values,
1541     // or public identifiers.
1542     //
1543     // input in strTmp
1544     private String normalize(boolean invalidIfNeeded) {
1545 
1546         // this can allocate an extra string...
1547 
1548         String s = strTmp.toString();
1549         String s2 = s.trim();
1550         boolean didStrip = false;
1551 
1552         if (s != s2) {
1553             s = s2;
1554             s2 = null;
1555             didStrip = true;
1556         }
1557         strTmp = new StringBuffer();
1558         for (int i = 0; i < s.length(); i++) {
1559             char c = s.charAt(i);
1560             if (!XmlChars.isSpace(c)) {
1561                 strTmp.append(c);
1562                 continue;
1563             }
1564             strTmp.append(' ');
1565             while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1566                 didStrip = true;

1567             i--;
1568         }
1569         if (didStrip)
1570             return strTmp.toString();
1571         else
1572             return s;
1573     }

1574 
1575     private boolean maybeConditionalSect()
1576             throws IOException, SAXException {
1577 
1578         // [61] conditionalSect ::= includeSect | ignoreSect
1579 
1580         if (!peek("<!["))
1581             return false;

1582 
1583         String keyword;
1584         InputEntity start = in;
1585 
1586         maybeWhitespace();
1587 
1588         if ((keyword = maybeGetName()) == null)
1589             fatal("P-046");

1590         maybeWhitespace();
1591         nextChar('[', "F-030", null);
1592 
1593         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1594         //                extSubsetDecl ']]>'
1595         if ("INCLUDE".equals(keyword)) {
1596             for (; ;) {
1597                 while (in.isEOF() && in != start)
1598                     in = in.pop();

1599                 if (in.isEOF()) {
1600                     error("V-020", null);
1601                 }
1602                 if (peek("]]>"))
1603                     break;

1604 
1605                 doLexicalPE = false;
1606                 if (maybeWhitespace())
1607                     continue;
1608                 if (maybePEReference())

1609                     continue;

1610                 doLexicalPE = true;
1611                 if (maybeMarkupDecl() || maybeConditionalSect())
1612                     continue;

1613 
1614                 fatal("P-047");
1615             }
1616 
1617             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1618             //            ignoreSectcontents ']]>'
1619             // [64] ignoreSectcontents ::= Ignore ('<!['
1620             //            ignoreSectcontents ']]>' Ignore)*
1621             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1622         } else if ("IGNORE".equals(keyword)) {
1623             int nestlevel = 1;
1624             // ignoreSectcontents
1625             doLexicalPE = false;
1626             while (nestlevel > 0) {
1627                 char c = getc();    // will pop input entities
1628                 if (c == '<') {
1629                     if (peek("!["))
1630                         nestlevel++;

1631                 } else if (c == ']') {
1632                     if (peek("]>"))
1633                         nestlevel--;
1634                 } else

1635                     continue;
1636             }
1637         } else

1638             fatal("P-048", new Object[]{keyword});

1639         return true;
1640     }
1641 
1642 
1643     //
1644     // CHAPTER 4:  Physical Structures
1645     //
1646 
1647     // parse decimal or hex numeric character reference
1648     private int parseCharNumber()
1649             throws IOException, SAXException {
1650 
1651         char c;
1652         int retval = 0;
1653 
1654         // n.b. we ignore overflow ...
1655         if (getc() != 'x') {
1656             ungetc();
1657             for (; ;) {
1658                 c = getc();
1659                 if (c >= '0' && c <= '9') {
1660                     retval *= 10;
1661                     retval += (c - '0');
1662                     continue;
1663                 }
1664                 if (c == ';')
1665                     return retval;

1666                 fatal("P-049");
1667             }
1668         } else
1669             for (; ;) {
1670                 c = getc();
1671                 if (c >= '0' && c <= '9') {
1672                     retval <<= 4;
1673                     retval += (c - '0');
1674                     continue;
1675                 }
1676                 if (c >= 'a' && c <= 'f') {
1677                     retval <<= 4;
1678                     retval += 10 + (c - 'a');
1679                     continue;
1680                 }
1681                 if (c >= 'A' && c <= 'F') {
1682                     retval <<= 4;
1683                     retval += 10 + (c - 'A');
1684                     continue;
1685                 }
1686                 if (c == ';')
1687                     return retval;

1688                 fatal("P-050");
1689             }
1690     }

1691 
1692     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1693     // though still subject to the 'Char' construct in XML
1694     private int surrogatesToCharTmp(int ucs4)
1695             throws SAXException {
1696 
1697         if (ucs4 <= 0xffff) {
1698             if (XmlChars.isChar(ucs4)) {
1699                 charTmp[0] = (char) ucs4;
1700                 return 1;
1701             }
1702         } else if (ucs4 <= 0x0010ffff) {
1703             // we represent these as UNICODE surrogate pairs
1704             ucs4 -= 0x10000;
1705             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1706             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1707             return 2;
1708         }
1709         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1710         // NOTREACHED
1711         return -1;
1712     }
1713 
1714     private boolean maybePEReference()
1715             throws IOException, SAXException {
1716 
1717         // This is the SYNTACTIC version of this construct.
1718         // When processing external entities, there is also
1719         // a LEXICAL version; see getc() and doLexicalPE.
1720 
1721         // [69] PEReference ::= '%' Name ';'
1722         if (!in.peekc('%'))
1723             return false;

1724 
1725         String name = maybeGetName();
1726         Object entity;
1727 
1728         if (name == null)
1729             fatal("P-011");

1730         nextChar(';', "F-021", name);
1731         entity = params.get(name);
1732 
1733         if (entity instanceof InternalEntity) {
1734             InternalEntity value = (InternalEntity) entity;
1735             pushReader(value.buf, name, false);
1736 
1737         } else if (entity instanceof ExternalEntity) {
1738             pushReader((ExternalEntity) entity);
1739             externalParameterEntity((ExternalEntity) entity);
1740 
1741         } else if (entity == null) {
1742             error("V-022", new Object[]{name});
1743         }
1744         return true;
1745     }
1746 
1747     private boolean maybeEntityDecl()
1748             throws IOException, SAXException {
1749 
1750         // [70] EntityDecl ::= GEDecl | PEDecl
1751         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1752         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1753         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1754         // [74] PEDef     ::= EntityValue |  ExternalID
1755         //
1756         InputEntity start = peekDeclaration("!ENTITY");
1757 
1758         if (start == null)
1759             return false;

1760 
1761         String entityName;
1762         SimpleHashtable defns;
1763         ExternalEntity externalId;
1764         boolean doStore;
1765 
1766         // PE expansion gets selectively turned off several places:
1767         // in ENTITY declarations (here), in comments, in PIs.
1768 
1769         // Here, we allow PE entities to be declared, and allows
1770         // literals to include PE refs without the added spaces
1771         // required with their expansion in markup decls.
1772 
1773         doLexicalPE = false;
1774         whitespace("F-005");
1775         if (in.peekc('%')) {
1776             whitespace("F-006");
1777             defns = params;
1778         } else
1779             defns = entities;

1780 
1781         ungetc();    // leave some whitespace
1782         doLexicalPE = true;
1783         entityName = getMarkupDeclname("F-017", false);
1784         whitespace("F-007");
1785         externalId = maybeExternalID();
1786 
1787         //
1788         // first definition sticks ... e.g. internal subset PEs are used
1789         // to override DTD defaults.  It's also an "error" to incorrectly
1790         // redefine builtin internal entities, but since reporting such
1791         // errors is optional we only give warnings ("just in case") for
1792         // non-parameter entities.
1793         //
1794         doStore = (defns.get(entityName) == null);
1795         if (!doStore && defns == entities)
1796             warning("P-054", new Object[]{entityName});

1797 
1798         // internal entities
1799         if (externalId == null) {
1800             char value [];
1801             InternalEntity entity;
1802 
1803             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
1804             parseLiteral(true);
1805             doLexicalPE = true;
1806             if (doStore) {
1807                 value = new char[strTmp.length()];
1808                 if (value.length != 0)
1809                     strTmp.getChars(0, value.length, value, 0);

1810                 entity = new InternalEntity(entityName, value);
1811                 entity.isPE = (defns == params);
1812                 entity.isFromInternalSubset = false;
1813                 defns.put(entityName, entity);
1814                 if (defns == entities)
1815                     dtdHandler.internalGeneralEntityDecl(entityName,
1816                             new String(value));
1817             }

1818 
1819             // external entities (including unparsed)
1820         } else {
1821             // [76] NDataDecl ::= S 'NDATA' S Name
1822             if (defns == entities && maybeWhitespace()
1823                     && peek("NDATA")) {
1824                 externalId.notation = getMarkupDeclname("F-018", false);
1825 
1826                 // flag undeclared notation for checking after
1827                 // the DTD is fully processed
1828                 if (notations.get(externalId.notation) == null)
1829                     notations.put(externalId.notation, Boolean.TRUE);
1830             }

1831             externalId.name = entityName;
1832             externalId.isPE = (defns == params);
1833             externalId.isFromInternalSubset = false;
1834             if (doStore) {
1835                 defns.put(entityName, externalId);
1836                 if (externalId.notation != null)
1837                     dtdHandler.unparsedEntityDecl(entityName,
1838                             externalId.publicId, externalId.systemId,
1839                             externalId.notation);
1840                 else if (defns == entities)
1841                     dtdHandler.externalGeneralEntityDecl(entityName,
1842                             externalId.publicId, externalId.systemId);
1843             }
1844         }

1845         maybeWhitespace();
1846         nextChar('>', "F-031", entityName);
1847         if (start != in)
1848             error("V-013", null);

1849         return true;
1850     }
1851 
1852     private ExternalEntity maybeExternalID()
1853             throws IOException, SAXException {
1854 
1855         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1856         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1857         String temp = null;
1858         ExternalEntity retval;
1859 
1860         if (peek("PUBLIC")) {
1861             whitespace("F-009");
1862             temp = parsePublicId();
1863         } else if (!peek("SYSTEM"))
1864             return null;

1865 
1866         retval = new ExternalEntity(in);
1867         retval.publicId = temp;
1868         whitespace("F-008");
1869         retval.systemId = parseSystemId();
1870         return retval;
1871     }
1872 
1873     private String parseSystemId()
1874             throws IOException, SAXException {
1875 
1876         String uri = getQuotedString("F-034", null);
1877         int temp = uri.indexOf(':');
1878 
1879         // resolve relative URIs ... must do it here since
1880         // it's relative to the source file holding the URI!
1881 
1882         // "new java.net.URL (URL, string)" conforms to RFC 1630,
1883         // but we can't use that except when the URI is a URL.
1884         // The entity resolver is allowed to handle URIs that are
1885         // not URLs, so we pass URIs through with scheme intact
1886         if (temp == -1 || uri.indexOf('/') < temp) {
1887             String baseURI;
1888 
1889             baseURI = in.getSystemId();
1890             if (baseURI == null)
1891                 fatal("P-055", new Object[]{uri});
1892             if (uri.length() == 0)

1893                 uri = ".";

1894             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1895             if (uri.charAt(0) != '/')
1896                 uri = baseURI + uri;
1897             else {
1898                 // XXX slashes at the beginning of a relative URI are
1899                 // a special case we don't handle.
1900                 throw new InternalError();
1901             }
1902 
1903             // letting other code map any "/xxx/../" or "/./" to "/",
1904             // since all URIs must handle it the same.
1905         }
1906         // check for fragment ID in URI
1907         if (uri.indexOf('#') != -1)
1908             error("P-056", new Object[]{uri});

1909         return uri;
1910     }
1911 
1912     private void maybeTextDecl()
1913             throws IOException, SAXException {
1914 
1915         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1916         if (peek("<?xml")) {
1917             readVersion(false, "1.0");
1918             readEncoding(true);
1919             maybeWhitespace();
1920             if (!peek("?>"))
1921                 fatal("P-057");
1922         }
1923     }

1924 
1925     private void externalParameterEntity(ExternalEntity next)
1926             throws IOException, SAXException {
1927 
1928         //
1929         // Reap the intended benefits of standalone declarations:
1930         // don't deal with external parameter entities, except to
1931         // validate the standalone declaration.
1932         //
1933 
1934         // n.b. "in external parameter entities" (and external
1935         // DTD subset, same grammar) parameter references can
1936         // occur "within" markup declarations ... expansions can
1937         // cross syntax rules.  Flagged here; affects getc().
1938 
1939         // [79] ExtPE ::= TextDecl? extSubsetDecl
1940         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1941         //        | PEReference | S )*
1942         InputEntity pe;
1943 
1944         // XXX if this returns false ...
1945 
1946         pe = in;
1947         maybeTextDecl();
1948         while (!pe.isEOF()) {
1949             // pop internal PEs (and whitespace before/after)
1950             if (in.isEOF()) {
1951                 in = in.pop();
1952                 continue;
1953             }
1954             doLexicalPE = false;
1955             if (maybeWhitespace())
1956                 continue;
1957             if (maybePEReference())

1958                 continue;

1959             doLexicalPE = true;
1960             if (maybeMarkupDecl() || maybeConditionalSect())
1961                 continue;

1962             break;
1963         }
1964         // if (in != pe) throw new InternalError("who popped my PE?");
1965         if (!pe.isEOF())
1966             fatal("P-059", new Object[]{in.getName()});
1967     }

1968 
1969     private void readEncoding(boolean must)
1970             throws IOException, SAXException {
1971 
1972         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1973         String name = maybeReadAttribute("encoding", must);
1974 
1975         if (name == null)
1976             return;

1977         for (int i = 0; i < name.length(); i++) {
1978             char c = name.charAt(i);
1979             if ((c >= 'A' && c <= 'Z')
1980                     || (c >= 'a' && c <= 'z'))
1981                 continue;

1982             if (i != 0
1983                     && ((c >= '0' && c <= '9')
1984                     || c == '-'
1985                     || c == '_'
1986                     || c == '.'
1987                     ))
1988                 continue;
1989             fatal("P-060", new Object[]{new Character(c)});

1990         }
1991 
1992         //
1993         // This should be the encoding in use, and it's even an error for
1994         // it to be anything else (in certain cases that are impractical to
1995         // to test, and may even be insufficient).  So, we do the best we
1996         // can, and warn if things look suspicious.  Note that Java doesn't
1997         // uniformly expose the encodings, and that the names it uses
1998         // internally are nonstandard.  Also, that the XML spec allows
1999         // such "errors" not to be reported at all.
2000         //
2001         String currentEncoding = in.getEncoding();
2002 
2003         if (currentEncoding != null
2004                 && !name.equalsIgnoreCase(currentEncoding))
2005             warning("P-061", new Object[]{name, currentEncoding});
2006     }

2007 
2008     private boolean maybeNotationDecl()
2009             throws IOException, SAXException {
2010 
2011         // [82] NotationDecl ::= '<!NOTATION' S Name S
2012         //        (ExternalID | PublicID) S? '>'
2013         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2014         InputEntity start = peekDeclaration("!NOTATION");
2015 
2016         if (start == null)
2017             return false;

2018 
2019         String name = getMarkupDeclname("F-019", false);
2020         ExternalEntity entity = new ExternalEntity(in);
2021 
2022         whitespace("F-011");
2023         if (peek("PUBLIC")) {
2024             whitespace("F-009");
2025             entity.publicId = parsePublicId();
2026             if (maybeWhitespace()) {
2027                 if (!peek(">"))
2028                     entity.systemId = parseSystemId();
2029                 else
2030                     ungetc();
2031             }

2032         } else if (peek("SYSTEM")) {
2033             whitespace("F-008");
2034             entity.systemId = parseSystemId();
2035         } else
2036             fatal("P-062");

2037         maybeWhitespace();
2038         nextChar('>', "F-032", name);
2039         if (start != in)
2040             error("V-013", null);
2041         if (entity.systemId != null && entity.systemId.indexOf('#') != -1)

2042             error("P-056", new Object[]{entity.systemId});

2043 
2044         Object value = notations.get(name);
2045         if (value != null && value instanceof ExternalEntity)
2046             warning("P-063", new Object[]{name});
2047 
2048         else {
2049             notations.put(name, entity);
2050             dtdHandler.notationDecl(name, entity.publicId,
2051                     entity.systemId);
2052         }
2053         return true;
2054     }
2055 
2056 
2057     ////////////////////////////////////////////////////////////////
2058     //
2059     //    UTILITIES
2060     //
2061     ////////////////////////////////////////////////////////////////
2062 
2063     private char getc() throws IOException, SAXException {
2064 
2065         if (!doLexicalPE) {
2066             char c = in.getc();
2067             return c;
2068         }
2069 
2070         //
2071         // External parameter entities get funky processing of '%param;'
2072         // references.  It's not clearly defined in the XML spec; but it
2073         // boils down to having those refs be _lexical_ in most cases to
2074         // include partial syntax productions.  It also needs selective
2075         // enabling; "<!ENTITY % foo ...>" must work, for example, and
2076         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2077         // if it's expanded in a literal, else "ab  cd".  PEs also do
2078         // not expand within comments or PIs, and external PEs are only
2079         // allowed to have markup decls (and so aren't handled lexically).
2080         //
2081         // This PE handling should be merged into maybeWhitespace, where
2082         // it can be dealt with more consistently.
2083         //
2084         // Also, there are some validity constraints in this area.
2085         //
2086         char c;
2087 
2088         while (in.isEOF()) {
2089             if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2090                 in = in.pop();
2091             else {
2092                 fatal("P-064", new Object[]{in.getName()});
2093             }
2094         }
2095         if ((c = in.getc()) == '%' && doLexicalPE) {
2096             // PE ref ::= '%' name ';'
2097             String name = maybeGetName();
2098             Object entity;
2099 
2100             if (name == null)
2101                 fatal("P-011");

2102             nextChar(';', "F-021", name);
2103             entity = params.get(name);
2104 
2105             // push a magic "entity" before and after the
2106             // real one, so ungetc() behaves uniformly
2107             pushReader(" ".toCharArray(), null, false);
2108             if (entity instanceof InternalEntity)
2109                 pushReader(((InternalEntity) entity).buf, name, false);
2110             else if (entity instanceof ExternalEntity)
2111             // PEs can't be unparsed!
2112             // XXX if this returns false ...

2113                 pushReader((ExternalEntity) entity);
2114             else if (entity == null)
2115             // see note in maybePEReference re making this be nonfatal.
2116                 fatal("V-022");
2117             else
2118                 throw new InternalError();

2119             pushReader(" ".toCharArray(), null, false);
2120             return in.getc();
2121         }
2122         return c;
2123     }
2124 
2125     private void ungetc() {
2126 
2127         in.ungetc();
2128     }
2129 
2130     private boolean peek(String s)
2131             throws IOException, SAXException {
2132 
2133         return in.peek(s, null);
2134     }
2135 
2136     // Return the entity starting the specified declaration
2137     // (for validating declaration nesting) else null.
2138 
2139     private InputEntity peekDeclaration(String s)
2140             throws IOException, SAXException {
2141 
2142         InputEntity start;
2143 
2144         if (!in.peekc('<'))
2145             return null;

2146         start = in;
2147         if (in.peek(s, null))
2148             return start;

2149         in.ungetc();
2150         return null;
2151     }
2152 
2153     private void nextChar(char c, String location, String near)
2154             throws IOException, SAXException {
2155 
2156         while (in.isEOF() && !in.isDocument())
2157             in = in.pop();
2158         if (!in.peekc(c))
2159             fatal("P-008", new Object[]
2160             {new Character(c),
2161              messages.getMessage(locale, location),
2162              (near == null ? "" : ('"' + near + '"'))});
2163     }

2164 
2165 
2166     private void pushReader(char buf [], String name, boolean isGeneral)
2167             throws SAXException {
2168 
2169         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2170         r.init(buf, name, in, !isGeneral);
2171         in = r;
2172     }
2173 
2174     private boolean pushReader(ExternalEntity next)
2175             throws IOException, SAXException {
2176 
2177         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2178         InputSource s;
2179         try {
2180             s = next.getInputSource(resolver);
2181         } catch (IOException e) {
2182             String msg =
2183                     "unable to open the external entity from :" + next.systemId;
2184             if (next.publicId != null)
2185                 msg += " (public id:" + next.publicId + ")";

2186 
2187             SAXParseException spe = new SAXParseException(msg,
2188                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2189             dtdHandler.fatalError(spe);
2190             throw e;
2191         }
2192 
2193         r.init(s, next.name, in, next.isPE);
2194         in = r;
2195         return true;
2196     }
2197 
2198     public String getPublicId() {
2199 
2200         return (in == null) ? null : in.getPublicId();
2201     }
2202 
2203     public String getSystemId() {
2204 
2205         return (in == null) ? null : in.getSystemId();
2206     }
2207 
2208     public int getLineNumber() {
2209 
2210         return (in == null) ? -1 : in.getLineNumber();
2211     }
2212 
2213     public int getColumnNumber() {
2214 
2215         return (in == null) ? -1 : in.getColumnNumber();
2216     }
2217 
2218     // error handling convenience routines
2219 
2220     private void warning(String messageId, Object parameters [])
2221             throws SAXException {
2222 
2223         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2224                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2225 
2226         dtdHandler.warning(e);
2227     }
2228 
2229     void error(String messageId, Object parameters [])
2230             throws SAXException {
2231 
2232         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2233                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2234 
2235         dtdHandler.error(e);
2236     }
2237 
2238     private void fatal(String messageId) throws SAXException {
2239 
2240         fatal(messageId, null);
2241     }
2242 
2243     private void fatal(String messageId, Object parameters [])
2244             throws SAXException {
2245 
2246         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2247                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2248 
2249         dtdHandler.fatalError(e);
2250 
2251         throw e;
2252     }
2253 
2254     //
2255     // Map char arrays to strings ... cuts down both on memory and
2256     // CPU usage for element/attribute/other names that are reused.
2257     //
2258     // Documents typically repeat names a lot, so we more or less
2259     // intern all the strings within the document; since some strings
2260     // are repeated in multiple documents (e.g. stylesheets) we go
2261     // a bit further, and intern globally.
2262     //
2263     static class NameCache {
2264         //
2265         // Unless we auto-grow this, the default size should be a
2266         // reasonable bit larger than needed for most XML files
2267         // we've yet seen (and be prime).  If it's too small, the
2268         // penalty is just excess cache collisions.
2269         //
2270         NameCacheEntry hashtable [] = new NameCacheEntry[541];

2271 
2272         //
2273         // Usually we just want to get the 'symbol' for these chars
2274         //
2275         String lookup(char value [], int len) {
2276 
2277             return lookupEntry(value, len).name;
2278         }
2279 
2280         //
2281         // Sometimes we need to scan the chars in the resulting
2282         // string, so there's an accessor which exposes them.
2283         // (Mostly for element end tags.)
2284         //
2285         NameCacheEntry lookupEntry(char value [], int len) {
2286 
2287             int index = 0;
2288             NameCacheEntry entry;
2289 
2290             // hashing to get index
2291             for (int i = 0; i < len; i++)
2292                 index = index * 31 + value[i];

2293             index &= 0x7fffffff;
2294             index %= hashtable.length;
2295 
2296             // return entry if one's there ...
2297             for (entry = hashtable[index];
2298                  entry != null;
2299                  entry = entry.next) {
2300                 if (entry.matches(value, len))
2301                     return entry;
2302             }

2303 
2304             // else create new one
2305             entry = new NameCacheEntry();
2306             entry.chars = new char[len];
2307             System.arraycopy(value, 0, entry.chars, 0, len);
2308             entry.name = new String(entry.chars);
2309             //
2310             // NOTE:  JDK 1.1 has a fixed size string intern table,
2311             // with non-GC'd entries.  It can panic here; that's a
2312             // JDK problem, use 1.2 or later with many identifiers.
2313             //
2314             entry.name = entry.name.intern();        // "global" intern
2315             entry.next = hashtable[index];
2316             hashtable[index] = entry;
2317             return entry;
2318         }
2319     }
2320 
2321     static class NameCacheEntry {
2322 
2323         String name;
2324         char chars [];
2325         NameCacheEntry next;
2326 
2327         boolean matches(char value [], int len) {
2328 
2329             if (chars.length != len)
2330                 return false;
2331             for (int i = 0; i < len; i++)
2332                 if (value[i] != chars[i])

2333                     return false;


2334             return true;
2335         }
2336     }
2337 
2338     //
2339     // Message catalog for diagnostics.
2340     //
2341     static final Catalog messages = new Catalog();
2342 
2343     static final class Catalog extends MessageCatalog {
2344 
2345         Catalog() {
2346             super(DTDParser.class);
2347         }
2348     }
2349 
2350 }
   1 /*
   2  * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 import org.xml.sax.Locator;
  31 import org.xml.sax.SAXException;
  32 import org.xml.sax.SAXParseException;
  33 
  34 import java.io.IOException;
  35 import java.util.ArrayList;
  36 import java.util.Enumeration;
  37 import java.util.Hashtable;
  38 import java.util.Locale;
  39 import java.util.Set;
  40 import java.util.Vector;
  41 import java.util.logging.Level;
  42 import java.util.logging.Logger;
  43 
  44 /**
  45  * This implements parsing of XML 1.0 DTDs.
  46  * <p>
  47  * This conforms to the portion of the XML 1.0 specification related to the
  48  * external DTD subset.
  49  * <p>
  50  * For multi-language applications (such as web servers using XML processing to
  51  * create dynamic content), a method supports choosing a locale for parser
  52  * diagnostics which is both understood by the message recipient and supported
  53  * by the parser.
  54  * <p>
  55  * This parser produces a stream of parse events. It supports some features
  56  * (exposing comments, CDATA sections, and entity references) which are not
  57  * required to be reported by conformant XML processors.
  58  *
  59  * @author David Brownell
  60  * @author Janet Koenig
  61  * @author Kohsuke KAWAGUCHI
  62  * @version $Id: DTDParser.java,v 1.2 2009-04-16 15:25:49 snajper Exp $
  63  */
  64 public class DTDParser {
  65 
  66     public final static String TYPE_CDATA = "CDATA";
  67     public final static String TYPE_ID = "ID";
  68     public final static String TYPE_IDREF = "IDREF";
  69     public final static String TYPE_IDREFS = "IDREFS";
  70     public final static String TYPE_ENTITY = "ENTITY";
  71     public final static String TYPE_ENTITIES = "ENTITIES";
  72     public final static String TYPE_NMTOKEN = "NMTOKEN";
  73     public final static String TYPE_NMTOKENS = "NMTOKENS";
  74     public final static String TYPE_NOTATION = "NOTATION";
  75     public final static String TYPE_ENUMERATION = "ENUMERATION";


  76     // stack of input entities being merged
  77     private InputEntity in;

  78     // temporaries reused during parsing
  79     private StringBuffer strTmp;
  80     private char nameTmp[];
  81     private NameCache nameCache;
  82     private char charTmp[] = new char[2];

  83     // temporary DTD parsing state
  84     private boolean doLexicalPE;

  85     // DTD state, used during parsing
  86 //    private SimpleHashtable    elements = new SimpleHashtable (47);
  87     protected final Set declaredElements = new java.util.HashSet();
  88     private SimpleHashtable params = new SimpleHashtable(7);

  89     // exposed to package-private subclass
  90     Hashtable notations = new Hashtable(7);
  91     SimpleHashtable entities = new SimpleHashtable(17);

  92     private SimpleHashtable ids = new SimpleHashtable();

  93     // listeners for DTD parsing events
  94     private DTDEventListener dtdHandler;

  95     private EntityResolver resolver;
  96     private Locale locale;

  97     // string constants -- use these copies so "==" works
  98     // package private
  99     static final String strANY = "ANY";
 100     static final String strEMPTY = "EMPTY";
 101 
 102     private static final Logger LOGGER = Logger.getLogger(DTDParser.class.getName());
 103 
 104     /**
 105      * Used by applications to request locale for diagnostics.
 106      *
 107      * @param l The locale to use, or null to use system defaults (which may
 108      * include only message IDs).
 109      */
 110     public void setLocale(Locale l) throws SAXException {
 111 
 112         if (l != null && !messages.isLocaleSupported(l.toString())) {
 113             throw new SAXException(messages.getMessage(locale,
 114                     "P-078", new Object[]{l}));
 115         }
 116         locale = l;
 117     }
 118 
 119     /**
 120      * Returns the diagnostic locale.
 121      */
 122     public Locale getLocale() {
 123         return locale;
 124     }
 125 
 126     /**
 127      * Chooses a client locale to use for diagnostics, using the first language
 128      * specified in the list that is supported by this parser. That locale is
 129      * then set using <a href="#setLocale(java.util.Locale)"> setLocale()</a>.
 130      * Such a list could be provided by a variety of user preference mechanisms,
 131      * including the HTTP <em>Accept-Language</em> header field.

 132      *
 133      * @param languages Array of language specifiers, ordered with the most
 134      * preferable one at the front. For example, "en-ca" then "fr-ca", followed
 135      * by "zh_CN". Both RFC 1766 and Java styles are supported.
 136      * @return The chosen locale, or null.
 137      * @see MessageCatalog
 138      */
 139     public Locale chooseLocale(String languages[])
 140             throws SAXException {
 141 
 142         Locale l = messages.chooseLocale(languages);
 143 
 144         if (l != null) {
 145             setLocale(l);
 146         }
 147         return l;
 148     }
 149 
 150     /**
 151      * Lets applications control entity resolution.
 152      */
 153     public void setEntityResolver(EntityResolver r) {
 154 
 155         resolver = r;
 156     }
 157 
 158     /**
 159      * Returns the object used to resolve entities
 160      */
 161     public EntityResolver getEntityResolver() {
 162 
 163         return resolver;
 164     }
 165 
 166     /**
 167      * Used by applications to set handling of DTD parsing events.
 168      */
 169     public void setDtdHandler(DTDEventListener handler) {
 170         dtdHandler = handler;
 171         if (handler != null) {
 172             handler.setDocumentLocator(new Locator() {
 173                 @Override
 174                 public String getPublicId() {
 175                     return DTDParser.this.getPublicId();
 176                 }
 177 
 178                 @Override
 179                 public String getSystemId() {
 180                     return DTDParser.this.getSystemId();
 181                 }
 182 
 183                 @Override
 184                 public int getLineNumber() {
 185                     return DTDParser.this.getLineNumber();
 186                 }
 187 
 188                 @Override
 189                 public int getColumnNumber() {
 190                     return DTDParser.this.getColumnNumber();
 191                 }
 192             });
 193         }
 194     }
 195 
 196     /**
 197      * Returns the handler used to for DTD parsing events.
 198      */
 199     public DTDEventListener getDtdHandler() {
 200         return dtdHandler;
 201     }
 202 
 203     /**
 204      * Parse a DTD.
 205      */
 206     public void parse(InputSource in)
 207             throws IOException, SAXException {
 208         init();
 209         parseInternal(in);
 210     }
 211 
 212     /**
 213      * Parse a DTD.
 214      */
 215     public void parse(String uri)
 216             throws IOException, SAXException {
 217         InputSource inSource;
 218 
 219         init();
 220         // System.out.println ("parse (\"" + uri + "\")");
 221         inSource = resolver.resolveEntity(null, uri);
 222 
 223         // If custom resolver punts resolution to parser, handle it ...
 224         if (inSource == null) {
 225             inSource = Resolver.createInputSource(new java.net.URL(uri), false);
 226 
 227             // ... or if custom resolver doesn't correctly construct the
 228             // input entity, patch it up enough so relative URIs work, and
 229             // issue a warning to minimize later confusion.
 230         } else if (inSource.getSystemId() == null) {
 231             warning("P-065", null);
 232             inSource.setSystemId(uri);
 233         }
 234 
 235         parseInternal(inSource);
 236     }
 237 
 238     // makes sure the parser is reset to "before a document"
 239     private void init() {
 240         in = null;
 241 
 242         // alloc temporary data used in parsing
 243         strTmp = new StringBuffer();
 244         nameTmp = new char[20];
 245         nameCache = new NameCache();
 246 
 247         // reset doc info
 248 //        isInAttribute = false;
 249 
 250         doLexicalPE = false;
 251 
 252         entities.clear();
 253         notations.clear();
 254         params.clear();
 255         //    elements.clear ();
 256         declaredElements.clear();
 257 
 258         // initialize predefined references ... re-interpreted later
 259         builtin("amp", "&");
 260         builtin("lt", "<");
 261         builtin("gt", ">");
 262         builtin("quot", "\"");
 263         builtin("apos", "'");
 264 
 265         if (locale == null) {
 266             locale = Locale.getDefault();
 267         }
 268         if (resolver == null) {
 269             resolver = new Resolver();
 270         }
 271         if (dtdHandler == null) {
 272             dtdHandler = new DTDHandlerBase();
 273         }
 274     }
 275 
 276     private void builtin(String entityName, String entityValue) {
 277         InternalEntity entity;
 278         entity = new InternalEntity(entityName, entityValue.toCharArray());
 279         entities.put(entityName, entity);
 280     }
 281 

 282     ////////////////////////////////////////////////////////////////
 283     //
 284     // parsing is by recursive descent, code roughly
 285     // following the BNF rules except tweaked for simple
 286     // lookahead.  rules are more or less in numeric order,
 287     // except where code sharing suggests other structures.
 288     //
 289     // a classic benefit of recursive descent parsers:  it's
 290     // relatively easy to get diagnostics that make sense.
 291     //
 292     ////////////////////////////////////////////////////////////////
 293     @SuppressWarnings("CallToThreadDumpStack")

 294     private void parseInternal(InputSource input)
 295             throws IOException, SAXException {
 296 
 297         if (input == null) {
 298             fatal("P-000");
 299         }
 300 
 301         try {
 302             in = InputEntity.getInputEntity(dtdHandler, locale);
 303             in.init(input, null, null, false);
 304 
 305             dtdHandler.startDTD(in);
 306 
 307             // [30] extSubset ::= TextDecl? extSubsetDecl
 308             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
 309             //        | PEReference | S )*
 310             //    ... same as [79] extPE, which is where the code is
 311 
 312             ExternalEntity externalSubset = new ExternalEntity(in);
 313             externalParameterEntity(externalSubset);
 314 
 315             if (!in.isEOF()) {
 316                 fatal("P-001", new Object[]{Integer.toHexString(((int) getc()))});

 317             }
 318             afterRoot();
 319             dtdHandler.endDTD();
 320 
 321         } catch (EndOfInputException e) {
 322             if (!in.isDocument()) {
 323                 String name = in.getName();
 324                 do {    // force a relevant URI and line number
 325                     in = in.pop();
 326                 } while (in.isInternal());
 327                 fatal("P-002", new Object[]{name});
 328             } else {
 329                 fatal("P-003", null);
 330             }
 331         } catch (RuntimeException e) {
 332             LOGGER.log(Level.SEVERE, "Internal DTD parser error.", e);



 333             throw new SAXParseException(e.getMessage() != null
 334                     ? e.getMessage() : e.getClass().getName(),
 335                     getPublicId(), getSystemId(),
 336                     getLineNumber(), getColumnNumber());
 337 
 338         } finally {
 339             // recycle temporary data used during parsing
 340             strTmp = null;
 341             nameTmp = null;
 342             nameCache = null;
 343 
 344             // ditto input sources etc
 345             if (in != null) {
 346                 in.close();
 347                 in = null;
 348             }
 349 
 350             // get rid of all DTD info ... some of it would be
 351             // useful for editors etc, investigate later.
 352 
 353             params.clear();
 354             entities.clear();
 355             notations.clear();
 356             declaredElements.clear();
 357 //        elements.clear();
 358             ids.clear();
 359         }
 360     }
 361 
 362     void afterRoot() throws SAXException {
 363         // Make sure all IDREFs match declared ID attributes.  We scan
 364         // after the document element is parsed, since XML allows forward
 365         // references, and only now can we know if they're all resolved.
 366 
 367         for (Enumeration e = ids.keys();
 368                 e.hasMoreElements();) {

 369             String id = (String) e.nextElement();
 370             Boolean value = (Boolean) ids.get(id);
 371             if (Boolean.FALSE.equals(value)) {
 372                 error("V-024", new Object[]{id});
 373             }
 374         }
 375     }
 376 
 377     // role is for diagnostics
 378     private void whitespace(String roleId)
 379             throws IOException, SAXException {
 380 
 381         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
 382         if (!maybeWhitespace()) {
 383             fatal("P-004", new Object[]{messages.getMessage(locale, roleId)});

 384         }
 385     }
 386 
 387     // S?
 388     private boolean maybeWhitespace()
 389             throws IOException, SAXException {
 390 
 391         if (!doLexicalPE) {
 392             return in.maybeWhitespace();
 393         }
 394 
 395         // see getc() for the PE logic -- this lets us splice
 396         // expansions of PEs in "anywhere".  getc() has smarts,
 397         // so for external PEs we don't bypass it.
 398 
 399         // XXX we can marginally speed PE handling, and certainly
 400         // be cleaner (hence potentially more correct), by using
 401         // the observations that expanded PEs only start and stop
 402         // where whitespace is allowed.  getc wouldn't need any
 403         // "lexical" PE expansion logic, and no other method needs
 404         // to handle termination of PEs.  (parsing of literals would
 405         // still need to pop entities, but not parsing of references
 406         // in content.)
 407 
 408         char c = getc();
 409         boolean saw = false;
 410 
 411         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 412             saw = true;
 413 
 414             // this gracefully ends things when we stop playing
 415             // with internal parameters.  caller should have a
 416             // grammar rule allowing whitespace at end of entity.
 417             if (in.isEOF() && !in.isInternal()) {
 418                 return saw;
 419             }
 420             c = getc();
 421         }
 422         ungetc();
 423         return saw;
 424     }
 425 
 426     private String maybeGetName()
 427             throws IOException, SAXException {
 428 
 429         NameCacheEntry entry = maybeGetNameCacheEntry();
 430         return (entry == null) ? null : entry.name;
 431     }
 432 
 433     private NameCacheEntry maybeGetNameCacheEntry()
 434             throws IOException, SAXException {
 435 
 436         // [5] Name ::= (Letter|'_'|':') (Namechar)*
 437         char c = getc();
 438 
 439         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
 440             ungetc();
 441             return null;
 442         }
 443         return nameCharString(c);
 444     }
 445 
 446     // Used when parsing enumerations
 447     private String getNmtoken()
 448             throws IOException, SAXException {
 449 
 450         // [7] Nmtoken ::= (Namechar)+
 451         char c = getc();
 452         if (!XmlChars.isNameChar(c)) {
 453             fatal("P-006", new Object[]{Character.valueOf(c)});
 454         }
 455         return nameCharString(c).name;
 456     }
 457 
 458     // n.b. this gets used when parsing attribute values (for
 459     // internal references) so we can't use strTmp; it's also
 460     // a hotspot for CPU and memory in the parser (called at least
 461     // once for each element) so this has been optimized a bit.

 462     private NameCacheEntry nameCharString(char c)
 463             throws IOException, SAXException {
 464 
 465         int i = 1;
 466 
 467         nameTmp[0] = c;
 468         for (;;) {
 469             if ((c = in.getNameChar()) == 0) {
 470                 break;
 471             }
 472             if (i >= nameTmp.length) {
 473                 char tmp[] = new char[nameTmp.length + 10];
 474                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
 475                 nameTmp = tmp;
 476             }
 477             nameTmp[i++] = c;
 478         }
 479         return nameCache.lookupEntry(nameTmp, i);
 480     }
 481 
 482     //
 483     // much similarity between parsing entity values in DTD
 484     // and attribute values (in DTD or content) ... both follow
 485     // literal parsing rules, newline canonicalization, etc
 486     //
 487     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
 488     // or else partially normalized attribute value (the first bit
 489     // of 3.3.3's spec, without the "if not CDATA" bits).
 490     //
 491     @SuppressWarnings("UnusedAssignment")
 492     private void parseLiteral(boolean isEntityValue)
 493             throws IOException, SAXException {
 494 
 495         // [9] EntityValue ::=
 496         //    '"' ([^"&%] | Reference | PEReference)* '"'
 497         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
 498         // [10] AttValue ::=
 499         //    '"' ([^"&]  | Reference             )* '"'
 500         //    |    "'" ([^'&]  | Reference             )* "'"
 501         char quote = getc();
 502         char c;
 503         InputEntity source = in;
 504 
 505         if (quote != '\'' && quote != '"') {
 506             fatal("P-007");
 507         }
 508 
 509         // don't report entity expansions within attributes,
 510         // they're reported "fully expanded" via SAX
 511 //    isInAttribute = !isEntityValue;
 512 
 513         // get value into strTmp
 514         strTmp = new StringBuffer();
 515 
 516         // scan, allowing entity push/pop wherever ...
 517         // expanded entities can't terminate the literal!
 518         for (;;) {
 519             if (in != source && in.isEOF()) {
 520                 // we don't report end of parsed entities
 521                 // within attributes (no SAX hooks)
 522                 in = in.pop();
 523                 continue;
 524             }
 525             if ((c = getc()) == quote && in == source) {
 526                 break;
 527             }
 528 
 529             //
 530             // Basically the "reference in attribute value"
 531             // row of the chart in section 4.4 of the spec
 532             //
 533             if (c == '&') {
 534                 String entityName = maybeGetName();
 535 
 536                 if (entityName != null) {
 537                     nextChar(';', "F-020", entityName);
 538 
 539                     // 4.4 says:  bypass these here ... we'll catch
 540                     // forbidden refs to unparsed entities on use
 541                     if (isEntityValue) {
 542                         strTmp.append('&');
 543                         strTmp.append(entityName);
 544                         strTmp.append(';');
 545                         continue;
 546                     }
 547                     expandEntityInLiteral(entityName, entities, isEntityValue);
 548 

 549                     // character references are always included immediately
 550                 } else if ((getc()) == '#') {
 551                     int tmp = parseCharNumber();
 552 
 553                     if (tmp > 0xffff) {
 554                         tmp = surrogatesToCharTmp(tmp);
 555                         strTmp.append(charTmp[0]);
 556                         if (tmp == 2) {
 557                             strTmp.append(charTmp[1]);
 558                         }
 559                     } else {
 560                         strTmp.append((char) tmp);
 561                     }
 562                 } else {
 563                     fatal("P-009");
 564                 }
 565                 continue;
 566 
 567             }
 568 
 569             // expand parameter entities only within entity value literals
 570             if (c == '%' && isEntityValue) {
 571                 String entityName = maybeGetName();
 572 
 573                 if (entityName != null) {
 574                     nextChar(';', "F-021", entityName);
 575                     expandEntityInLiteral(entityName, params, isEntityValue);
 576                     continue;
 577                 } else {
 578                     fatal("P-011");
 579                 }
 580             }
 581 
 582             // For attribute values ...
 583             if (!isEntityValue) {
 584                 // 3.3.3 says whitespace normalizes to space...
 585                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
 586                     strTmp.append(' ');
 587                     continue;
 588                 }
 589 
 590                 // "<" not legal in parsed literals ...
 591                 if (c == '<') {
 592                     fatal("P-012");
 593                 }
 594             }
 595 
 596             strTmp.append(c);
 597         }
 598 //    isInAttribute = false;
 599     }
 600 
 601     // does a SINGLE expansion of the entity (often reparsed later)
 602     private void expandEntityInLiteral(String name, SimpleHashtable table,
 603             boolean isEntityValue)
 604             throws IOException, SAXException {
 605 
 606         Object entity = table.get(name);
 607 
 608         if (entity instanceof InternalEntity) {
 609             InternalEntity value = (InternalEntity) entity;
 610             pushReader(value.buf, name, !value.isPE);
 611 
 612         } else if (entity instanceof ExternalEntity) {
 613             if (!isEntityValue) // must be a PE ...
 614             {
 615                 fatal("P-013", new Object[]{name});
 616             }
 617             // XXX if this returns false ...
 618             pushReader((ExternalEntity) entity);
 619 
 620         } else if (entity == null) {
 621             //
 622             // Note:  much confusion about whether spec requires such
 623             // errors to be fatal in many cases, but none about whether
 624             // it allows "normal" errors to be unrecoverable!
 625             //
 626             fatal((table == params) ? "V-022" : "P-014",
 627                     new Object[]{name});
 628         }
 629     }
 630 
 631     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
 632     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'

 633     // NOTE:  XML spec should explicitly say that PE ref syntax is
 634     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
 635     // values ... can't process the XML spec's own DTD without doing
 636     // that for comments.

 637     private String getQuotedString(String type, String extra)
 638             throws IOException, SAXException {
 639 
 640         // use in.getc to bypass PE processing
 641         char quote = in.getc();
 642 
 643         if (quote != '\'' && quote != '"') {
 644             fatal("P-015", new Object[]{
 645                         messages.getMessage(locale, type, new Object[]{extra})
 646                     });
 647         }
 648 
 649         char c;
 650 
 651         strTmp = new StringBuffer();
 652         while ((c = in.getc()) != quote) {
 653             strTmp.append((char) c);
 654         }
 655         return strTmp.toString();
 656     }
 657 

 658     private String parsePublicId() throws IOException, SAXException {
 659 
 660         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
 661         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
 662         String retval = getQuotedString("F-033", null);
 663         for (int i = 0; i < retval.length(); i++) {
 664             char c = retval.charAt(i);
 665             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
 666                     && !(c >= 'A' && c <= 'Z')
 667                     && !(c >= 'a' && c <= 'z')) {
 668                 fatal("P-016", new Object[]{Character.valueOf(c)});
 669             }
 670         }
 671         strTmp = new StringBuffer();
 672         strTmp.append(retval);
 673         return normalize(false);
 674     }
 675 
 676     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 677     // handled by:  InputEntity.parsedContent()

 678     private boolean maybeComment(boolean skipStart)
 679             throws IOException, SAXException {
 680 
 681         // [15] Comment ::= '<!--'
 682         //        ( (Char - '-') | ('-' (Char - '-'))*
 683         //        '-->'
 684         if (!in.peek(skipStart ? "!--" : "<!--", null)) {
 685             return false;
 686         }
 687 
 688         boolean savedLexicalPE = doLexicalPE;
 689         boolean saveCommentText;
 690 
 691         doLexicalPE = false;
 692         saveCommentText = false;
 693         if (saveCommentText) {
 694             strTmp = new StringBuffer();
 695         }
 696 
 697         oneComment:
 698         for (;;) {
 699             try {
 700                 // bypass PE expansion, but permit PEs
 701                 // to complete ... valid docs won't care.
 702                 for (;;) {
 703                     int c = getc();
 704                     if (c == '-') {
 705                         c = getc();
 706                         if (c != '-') {
 707                             if (saveCommentText) {
 708                                 strTmp.append('-');
 709                             }
 710                             ungetc();
 711                             continue;
 712                         }
 713                         nextChar('>', "F-022", null);
 714                         break oneComment;
 715                     }
 716                     if (saveCommentText) {
 717                         strTmp.append((char) c);
 718                     }
 719                 }
 720             } catch (EndOfInputException e) {
 721                 //
 722                 // This is fatal EXCEPT when we're processing a PE...
 723                 // in which case a validating processor reports an error.
 724                 // External PEs are easy to detect; internal ones we
 725                 // infer by being an internal entity outside an element.
 726                 //
 727                 if (in.isInternal()) {
 728                     error("V-021", null);
 729                 }
 730                 fatal("P-017");
 731             }
 732         }
 733         doLexicalPE = savedLexicalPE;
 734         if (saveCommentText) {
 735             dtdHandler.comment(strTmp.toString());
 736         }
 737         return true;
 738     }
 739 
 740     private boolean maybePI(boolean skipStart)
 741             throws IOException, SAXException {
 742 
 743         // [16] PI ::= '<?' PITarget
 744         //        (S (Char* - (Char* '?>' Char*)))?
 745         //        '?>'
 746         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
 747         boolean savedLexicalPE = doLexicalPE;
 748 
 749         if (!in.peek(skipStart ? "?" : "<?", null)) {
 750             return false;
 751         }
 752         doLexicalPE = false;
 753 
 754         String target = maybeGetName();
 755 
 756         if (target == null) {
 757             fatal("P-018");
 758         }
 759         if ("xml".equals(target)) {
 760             fatal("P-019");
 761         }
 762         if ("xml".equalsIgnoreCase(target)) {
 763             fatal("P-020", new Object[]{target});
 764         }
 765 
 766         if (maybeWhitespace()) {
 767             strTmp = new StringBuffer();
 768             try {
 769                 for (;;) {
 770                     // use in.getc to bypass PE processing
 771                     char c = in.getc();
 772                     //Reached the end of PI.
 773                     if (c == '?' && in.peekc('>')) {
 774                         break;
 775                     }
 776                     strTmp.append(c);
 777                 }
 778             } catch (EndOfInputException e) {
 779                 fatal("P-021");
 780             }
 781             dtdHandler.processingInstruction(target, strTmp.toString());
 782         } else {
 783             if (!in.peek("?>", null)) {
 784                 fatal("P-022");
 785             }
 786             dtdHandler.processingInstruction(target, "");
 787         }
 788 
 789         doLexicalPE = savedLexicalPE;
 790         return true;
 791     }
 792 
 793     // [18] CDSect ::= CDStart CData CDEnd
 794     // [19] CDStart ::= '<![CDATA['
 795     // [20] CData ::= (Char* - (Char* ']]>' Char*))
 796     // [21] CDEnd ::= ']]>'
 797     //
 798     //    ... handled by InputEntity.unparsedContent()

 799     // collapsing several rules together ...
 800     // simpler than attribute literals -- no reference parsing!
 801     private String maybeReadAttribute(String name, boolean must)
 802             throws IOException, SAXException {
 803 
 804         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
 805         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
 806         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
 807         if (!maybeWhitespace()) {
 808             if (!must) {
 809                 return null;
 810             }
 811             fatal("P-024", new Object[]{name});
 812             // NOTREACHED
 813         }
 814 
 815         if (!peek(name)) {
 816             if (must) {
 817                 fatal("P-024", new Object[]{name});
 818             } else {


 822                 ungetc();
 823                 return null;
 824             }
 825         }
 826 
 827         // [25] Eq ::= S? '=' S?
 828         maybeWhitespace();
 829         nextChar('=', "F-023", null);
 830         maybeWhitespace();
 831 
 832         return getQuotedString("F-035", name);
 833     }
 834 
 835     private void readVersion(boolean must, String versionNum)
 836             throws IOException, SAXException {
 837 
 838         String value = maybeReadAttribute("version", must);
 839 
 840         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
 841 
 842         if (must && value == null) {
 843             fatal("P-025", new Object[]{versionNum});
 844         }
 845         if (value != null) {
 846             int length = value.length();
 847             for (int i = 0; i < length; i++) {
 848                 char c = value.charAt(i);
 849                 if (!((c >= '0' && c <= '9')
 850                         || c == '_' || c == '.'
 851                         || (c >= 'a' && c <= 'z')
 852                         || (c >= 'A' && c <= 'Z')
 853                         || c == ':' || c == '-')) {

 854                     fatal("P-026", new Object[]{value});
 855                 }
 856             }
 857         }
 858         if (value != null && !value.equals(versionNum)) {
 859             error("P-027", new Object[]{versionNum, value});
 860         }
 861     }
 862 
 863     // common code used by most markup declarations
 864     // ... S (Q)Name ...
 865     private String getMarkupDeclname(String roleId, boolean qname)
 866             throws IOException, SAXException {
 867 
 868         String name;
 869 
 870         whitespace(roleId);
 871         name = maybeGetName();
 872         if (name == null) {
 873             fatal("P-005", new Object[]{messages.getMessage(locale, roleId)});
 874         }
 875         return name;
 876     }
 877 
 878     private boolean maybeMarkupDecl()
 879             throws IOException, SAXException {
 880 
 881         // [29] markupdecl ::= elementdecl | Attlistdecl
 882         //           | EntityDecl | NotationDecl | PI | Comment
 883         return maybeElementDecl()
 884                 || maybeAttlistDecl()
 885                 || maybeEntityDecl()
 886                 || maybeNotationDecl()
 887                 || maybePI(false)
 888                 || maybeComment(false);
 889     }

 890     private static final String XmlLang = "xml:lang";
 891 
 892     private boolean isXmlLang(String value) {
 893 
 894         // [33] LanguageId ::= Langcode ('-' Subcode)*
 895         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
 896         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
 897         // [36] IanaCode ::= [iI] '-' SubCode
 898         // [37] UserCode ::= [xX] '-' SubCode
 899         // [38] SubCode ::= [a-zA-Z]+
 900 
 901         // the ISO and IANA codes (and subcodes) are registered,
 902         // but that's neither a WF nor a validity constraint.
 903 
 904         int nextSuffix;
 905         char c;
 906 
 907         if (value.length() < 2) {
 908             return false;
 909         }
 910         c = value.charAt(1);
 911         if (c == '-') {        // IANA, or user, code
 912             c = value.charAt(0);
 913             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) {
 914                 return false;
 915             }
 916             nextSuffix = 1;
 917         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 918             // 2 letter ISO code, or error
 919             c = value.charAt(0);
 920             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
 921                 return false;
 922             }
 923             nextSuffix = 2;
 924         } else {
 925             return false;
 926         }
 927 
 928         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
 929         while (nextSuffix < value.length()) {
 930             c = value.charAt(nextSuffix);
 931             if (c != '-') {
 932                 break;
 933             }
 934             while (++nextSuffix < value.length()) {
 935                 c = value.charAt(nextSuffix);
 936                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
 937                     break;
 938                 }
 939             }
 940         }
 941         return value.length() == nextSuffix && c != '-';
 942     }
 943 

 944     //
 945     // CHAPTER 3:  Logical Structures
 946     //

 947     /**
 948      * To validate, subclassers should at this time make sure that values are of
 949      * the declared types:<UL> <LI> ID and IDREF(S) values are Names <LI>
 950      * NMTOKEN(S) are Nmtokens <LI> ENUMERATION values match one of the tokens
 951      * <LI> NOTATION values match a notation name <LI> ENTITIY(IES) values match
 952      * an unparsed external entity </UL>
 953      * <p>
 954      * <P> Separately, make sure IDREF values match some ID provided in the
 955      * document (in the afterRoot method).



 956      */
 957     /*    void validateAttributeSyntax (Attribute attr, String value)
 958      throws DTDParseException {
 959      // ID, IDREF(S) ... values are Names
 960      if (Attribute.ID == attr.type()) {
 961      if (!XmlNames.isName (value))
 962      error ("V-025", new Object [] { value });
 963 
 964      Boolean             b = (Boolean) ids.getNonInterned (value);
 965      if (b == null || b.equals (Boolean.FALSE))
 966      ids.put (value.intern (), Boolean.TRUE);
 967      else
 968      error ("V-026", new Object [] { value });
 969 
 970      } else if (Attribute.IDREF == attr.type()) {
 971      if (!XmlNames.isName (value))
 972      error ("V-027", new Object [] { value });
 973 
 974      Boolean             b = (Boolean) ids.getNonInterned (value);
 975      if (b == null)
 976      ids.put (value.intern (), Boolean.FALSE);
 977 


1040      if (!isUnparsedEntity (value))
1041      error ("V-031", new Object [] { value });
1042 
1043      } else if (Attribute.ENTITIES == attr.type()) {
1044      StringTokenizer     tokenizer = new StringTokenizer (value);
1045      boolean             sawValue = false;
1046 
1047      while (tokenizer.hasMoreTokens ()) {
1048      value = tokenizer.nextToken ();
1049      // see note above re standalone
1050      if (!isUnparsedEntity (value))
1051      error ("V-031", new Object [] { value });
1052      sawValue = true;
1053      }
1054      if (!sawValue)
1055      error ("V-040", null);
1056 
1057      } else if (Attribute.CDATA != attr.type())
1058      throw new InternalError (attr.type());
1059      }
1060      */
1061     /*
1062      private boolean isUnparsedEntity (String name)
1063      {
1064      Object e = entities.getNonInterned (name);
1065      if (e == null || !(e instanceof ExternalEntity))
1066      return false;
1067      return ((ExternalEntity)e).notation != null;
1068      }
1069      */
1070     private boolean maybeElementDecl()
1071             throws IOException, SAXException {
1072 
1073         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1074         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1075         InputEntity start = peekDeclaration("!ELEMENT");
1076 
1077         if (start == null) {
1078             return false;
1079         }
1080 
1081         // n.b. for content models where inter-element whitespace is
1082         // ignorable, we mark that fact here.
1083         String name = getMarkupDeclname("F-015", true);
1084 //    Element        element = (Element) elements.get (name);
1085 //    boolean        declEffective = false;
1086 
1087         /*
1088          if (element != null) {
1089          if (element.contentModel() != null) {
1090          error ("V-012", new Object [] { name });
1091          } // else <!ATTLIST name ...> came first
1092          } else {
1093          element = new Element(name);
1094          elements.put (element.name(), element);
1095          declEffective = true;
1096          }
1097          */
1098         if (declaredElements.contains(name)) {
1099             error("V-012", new Object[]{name});
1100         } else {
1101             declaredElements.add(name);
1102 //        declEffective = true;
1103         }
1104 
1105         short modelType;
1106         whitespace("F-000");
1107         if (peek(strEMPTY)) {
1108 ///        // leave element.contentModel as null for this case.
1109             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1110         } else if (peek(strANY)) {
1111 ///        element.setContentModel(new StringModel(StringModelType.ANY));
1112             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1113         } else {
1114             modelType = getMixedOrChildren(name);
1115         }
1116 
1117         dtdHandler.endContentModel(name, modelType);
1118 
1119         maybeWhitespace();
1120         char c = getc();
1121         if (c != '>') {
1122             fatal("P-036", new Object[]{name, Character.valueOf(c)});
1123         }
1124         if (start != in) {
1125             error("V-013", null);
1126         }
1127 
1128 ///        dtdHandler.elementDecl(element);
1129 
1130         return true;
1131     }
1132 
1133     // We're leaving the content model as a regular expression;
1134     // it's an efficient natural way to express such things, and
1135     // libraries often interpret them.  No whitespace in the
1136     // model we store, though!

1137     /**
1138      * returns content model type.
1139      */
1140     private short getMixedOrChildren(String elementName/*Element element*/)
1141             throws IOException, SAXException {
1142 
1143         InputEntity start;
1144 
1145         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1146         strTmp = new StringBuffer();
1147 
1148         nextChar('(', "F-028", elementName);
1149         start = in;
1150         maybeWhitespace();
1151         strTmp.append('(');
1152 
1153         short modelType;
1154         if (peek("#PCDATA")) {
1155             strTmp.append("#PCDATA");
1156             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);


1183             String tag;
1184 
1185             tag = maybeGetName();
1186             if (tag != null) {
1187                 strTmp.append(tag);
1188 //                temp = new ElementModel(tag);
1189 //                getFrequency((RepeatableContent)temp);
1190 ///->
1191                 dtdHandler.childElement(tag, getFrequency());
1192 ///<-
1193             } else if (peek("(")) {
1194                 InputEntity next = in;
1195                 strTmp.append('(');
1196                 maybeWhitespace();
1197 //                temp = getcps(element, next);
1198 //                getFrequency(temp);
1199 ///->
1200                 getcps(elementName, next);
1201 ///                getFrequency();        <- this looks like a bug
1202 ///<-
1203             } else {
1204                 fatal((type == 0) ? "P-039"
1205                         : ((type == ',') ? "P-037" : "P-038"),
1206                         new Object[]{Character.valueOf(getc())});
1207             }
1208 
1209             maybeWhitespace();
1210             if (decided) {
1211                 char c = getc();
1212 
1213 //                if (current != null) {
1214 //                    current.addChild(temp);
1215 //                }
1216                 if (c == type) {
1217                     strTmp.append(type);
1218                     maybeWhitespace();
1219                     reportConnector(type);
1220                     continue;
1221                 } else if (c == '\u0029') {    // rparen
1222                     ungetc();
1223                     continue;
1224                 } else {
1225                     fatal((type == 0) ? "P-041" : "P-040",
1226                             new Object[]{
1227                                 Character.valueOf(c),
1228                                 Character.valueOf(type)
1229                             });
1230                 }
1231             } else {
1232                 type = getc();
1233                 switch (type) {
1234                     case '|':
1235                     case ',':
1236                         reportConnector(type);
1237                         break;
1238                     default:
1239 //                        retval = temp;
1240                         ungetc();
1241                         continue;
1242                 }
1243 //                retval = (ContentModel)current;
1244                 decided = true;
1245 //                current.addChild(temp);
1246                 strTmp.append(type);
1247             }
1248             maybeWhitespace();
1249         } while (!peek(")"));
1250 
1251         if (in != start) {
1252             error("V-014", new Object[]{elementName});
1253         }
1254         strTmp.append(')');
1255 
1256         dtdHandler.endModelGroup(getFrequency());
1257 //        return retval;
1258     }
1259 
1260     private void reportConnector(char type) throws SAXException {
1261         switch (type) {
1262             case '|':
1263                 dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
1264                 return;
1265             case ',':
1266                 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1267                 return;
1268             default:
1269                 throw new Error();    //assertion failed.
1270         }
1271     }
1272 
1273     private short getFrequency()


1285             //        original.setRepeat(Repeat.ONE_OR_MORE);
1286         } else if (c == '*') {
1287             strTmp.append(c);
1288             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1289             //        original.setRepeat(Repeat.ZERO_OR_MORE);
1290         } else {
1291             ungetc();
1292             return DTDEventListener.OCCURENCE_ONCE;
1293         }
1294     }
1295 
1296     // '(' S? '#PCDATA' already consumed
1297     // matching ')' must be in "start" entity if validating
1298     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1299             throws IOException, SAXException {
1300 
1301         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1302         //        | '(' S? '#PCDATA'                   S? ')'
1303         maybeWhitespace();
1304         if (peek("\u0029*") || peek("\u0029")) {
1305             if (in != start) {
1306                 error("V-014", new Object[]{elementName});
1307             }
1308             strTmp.append(')');
1309 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1310             return;
1311         }
1312 
1313         ArrayList l = new ArrayList();
1314 //    l.add(new StringModel(StringModelType.PCDATA));
1315 
1316 
1317         while (peek("|")) {
1318             String name;
1319 
1320             strTmp.append('|');
1321             maybeWhitespace();
1322 
1323             doLexicalPE = true;
1324             name = maybeGetName();
1325             if (name == null) {
1326                 fatal("P-042", new Object[]{elementName, Integer.toHexString(getc())});
1327             }
1328             if (l.contains(name)) {
1329                 error("V-015", new Object[]{name});
1330             } else {
1331                 l.add(name);
1332                 dtdHandler.mixedElement(name);
1333             }
1334             strTmp.append(name);
1335             maybeWhitespace();
1336         }
1337 
1338         if (!peek("\u0029*")) // right paren
1339         {
1340             fatal("P-043", new Object[]{elementName, Character.valueOf(getc())});
1341         }
1342         if (in != start) {
1343             error("V-014", new Object[]{elementName});
1344         }
1345         strTmp.append(')');
1346 //        ChoiceModel cm = new ChoiceModel((Collection)l);
1347 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1348 //       element.setContentModel(cm);
1349     }
1350 
1351     private boolean maybeAttlistDecl()
1352             throws IOException, SAXException {
1353 
1354         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1355         InputEntity start = peekDeclaration("!ATTLIST");
1356 
1357         if (start == null) {
1358             return false;
1359         }
1360 
1361         String elementName = getMarkupDeclname("F-016", true);
1362 //    Element    element = (Element) elements.get (name);
1363 
1364 //    if (element == null) {
1365 //        // not yet declared -- no problem.
1366 //        element = new Element(name);
1367 //        elements.put(name, element);
1368 //    }
1369 
1370         while (!peek(">")) {
1371 
1372             // [53] AttDef ::= S Name S AttType S DefaultDecl
1373             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1374 
1375             // look for global attribute definitions, don't expand for now...
1376             maybeWhitespace();
1377             char c = getc();
1378             if (c == '%') {
1379                 String entityName = maybeGetName();
1380                 if (entityName != null) {
1381                     nextChar(';', "F-021", entityName);
1382                     whitespace("F-021");
1383                     continue;
1384                 } else {
1385                     fatal("P-011");
1386                 }
1387             }
1388 
1389             ungetc();
1390             // look for attribute name otherwise
1391             String attName = maybeGetName();
1392             if (attName == null) {
1393                 fatal("P-044", new Object[]{Character.valueOf(getc())});
1394             }
1395             whitespace("F-001");
1396 
1397 ///        Attribute    a = new Attribute (name);
1398 
1399             String typeName;
1400             Vector values = null;    // notation/enumeration values
1401 
1402             // Note:  use the type constants from Attribute
1403             // so that "==" may be used (faster)
1404 
1405             // [55] StringType ::= 'CDATA'
1406             if (peek(TYPE_CDATA)) ///            a.setType(Attribute.CDATA);
1407             {
1408                 typeName = TYPE_CDATA;
1409             } // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'

1410             //        | 'ENTITY' | 'ENTITIES'
1411             //        | 'NMTOKEN' | 'NMTOKENS'
1412             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1413             // match peekahead ... so this order matters!
1414             else if (peek(TYPE_IDREFS)) {
1415                 typeName = TYPE_IDREFS;
1416             } else if (peek(TYPE_IDREF)) {
1417                 typeName = TYPE_IDREF;
1418             } else if (peek(TYPE_ID)) {
1419                 typeName = TYPE_ID;
1420 // TODO: should implement this error check?
1421 ///        if (element.id() != null) {
1422 ///                    error ("V-016", new Object [] { element.id() });
1423 ///        } else
1424 ///            element.setId(name);
1425             } else if (peek(TYPE_ENTITY)) {
1426                 typeName = TYPE_ENTITY;
1427             } else if (peek(TYPE_ENTITIES)) {
1428                 typeName = TYPE_ENTITIES;
1429             } else if (peek(TYPE_NMTOKENS)) {
1430                 typeName = TYPE_NMTOKENS;
1431             } else if (peek(TYPE_NMTOKEN)) {
1432                 typeName = TYPE_NMTOKEN;
1433             } // [57] EnumeratedType ::= NotationType | Enumeration

1434             // [58] NotationType ::= 'NOTATION' S '(' S? Name
1435             //        (S? '|' S? Name)* S? ')'
1436             else if (peek(TYPE_NOTATION)) {
1437                 typeName = TYPE_NOTATION;
1438                 whitespace("F-002");
1439                 nextChar('(', "F-029", null);
1440                 maybeWhitespace();
1441 
1442                 values = new Vector();
1443                 do {
1444                     String name;
1445                     if ((name = maybeGetName()) == null) {
1446                         fatal("P-068");
1447                     }
1448                     // permit deferred declarations
1449                     if (notations.get(name) == null) {
1450                         notations.put(name, name);
1451                     }
1452                     values.addElement(name);
1453                     maybeWhitespace();
1454                     if (peek("|")) {
1455                         maybeWhitespace();
1456                     }
1457                 } while (!peek(")"));
1458 ///            a.setValues(new String [v.size ()]);
1459 ///            for (int i = 0; i < v.size (); i++)
1460 ///                a.setValue(i, (String)v.elementAt(i));
1461 
1462                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1463             } else if (peek("(")) {
1464 ///            a.setType(Attribute.ENUMERATION);
1465                 typeName = TYPE_ENUMERATION;
1466 
1467                 maybeWhitespace();
1468 
1469 ///            Vector v = new Vector ();
1470                 values = new Vector();
1471                 do {
1472                     String name = getNmtoken();
1473 ///                v.addElement (name);
1474                     values.addElement(name);
1475                     maybeWhitespace();
1476                     if (peek("|")) {
1477                         maybeWhitespace();
1478                     }
1479                 } while (!peek(")"));
1480 ///            a.setValues(new String [v.size ()]);
1481 ///            for (int i = 0; i < v.size (); i++)
1482 ///                a.setValue(i, (String)v.elementAt(i));
1483             } else {
1484                 fatal("P-045",
1485                         new Object[]{attName, Character.valueOf(getc())});
1486                 typeName = null;
1487             }
1488 
1489             short attributeUse;
1490             String defaultValue = null;
1491 
1492             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1493             //        | (('#FIXED' S)? AttValue)
1494             whitespace("F-003");
1495             if (peek("#REQUIRED")) {
1496                 attributeUse = DTDEventListener.USE_REQUIRED;
1497             } ///            a.setIsRequired(true);
1498             else if (peek("#FIXED")) {
1499 ///            if (a.type() == Attribute.ID)
1500                 if (typeName == TYPE_ID) {
1501                     error("V-017", new Object[]{attName});
1502                 }
1503 ///            a.setIsFixed(true);
1504                 attributeUse = DTDEventListener.USE_FIXED;
1505                 whitespace("F-004");
1506                 parseLiteral(false);
1507 ///            if (a.type() != Attribute.CDATA)
1508 ///                a.setDefaultValue(normalize(false));
1509 ///            else
1510 ///                a.setDefaultValue(strTmp.toString());
1511 
1512                 if (typeName == TYPE_CDATA) {
1513                     defaultValue = normalize(false);
1514                 } else {
1515                     defaultValue = strTmp.toString();
1516                 }
1517 
1518 // TODO: implement this check
1519 ///            if (a.type() != Attribute.CDATA)
1520 ///                validateAttributeSyntax (a, a.defaultValue());
1521             } else if (!peek("#IMPLIED")) {
1522                 attributeUse = DTDEventListener.USE_IMPLIED;
1523 
1524 ///            if (a.type() == Attribute.ID)
1525                 if (typeName == TYPE_ID) {
1526                     error("V-018", new Object[]{attName});
1527                 }
1528                 parseLiteral(false);
1529 ///            if (a.type() != Attribute.CDATA)
1530 ///                a.setDefaultValue(normalize(false));
1531 ///            else
1532 ///                a.setDefaultValue(strTmp.toString());
1533                 if (typeName == TYPE_CDATA) {
1534                     defaultValue = normalize(false);
1535                 } else {
1536                     defaultValue = strTmp.toString();
1537                 }
1538 
1539 // TODO: implement this check
1540 ///            if (a.type() != Attribute.CDATA)
1541 ///                validateAttributeSyntax (a, a.defaultValue());
1542             } else {
1543                 // TODO: this looks like an fatal error.
1544                 attributeUse = DTDEventListener.USE_NORMAL;
1545             }
1546 
1547             if (XmlLang.equals(attName)
1548                     && defaultValue/* a.defaultValue()*/ != null
1549                     && !isXmlLang(defaultValue/*a.defaultValue()*/)) {
1550                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
1551             }
1552 
1553 // TODO: isn't it an error to specify the same attribute twice?
1554 ///        if (!element.attributes().contains(a)) {
1555 ///            element.addAttribute(a);
1556 ///            dtdHandler.attributeDecl(a);
1557 ///        }
1558 
1559             String[] v = (values != null) ? (String[]) values.toArray(new String[values.size()]) : null;
1560             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1561             maybeWhitespace();
1562         }
1563         if (start != in) {
1564             error("V-013", null);
1565         }
1566         return true;
1567     }
1568 
1569     // used when parsing literal attribute values,
1570     // or public identifiers.
1571     //
1572     // input in strTmp
1573     private String normalize(boolean invalidIfNeeded) {
1574 
1575         // this can allocate an extra string...
1576 
1577         String s = strTmp.toString();
1578         String s2 = s.trim();
1579         boolean didStrip = false;
1580 
1581         if (s != s2) {
1582             s = s2;

1583             didStrip = true;
1584         }
1585         strTmp = new StringBuffer();
1586         for (int i = 0; i < s.length(); i++) {
1587             char c = s.charAt(i);
1588             if (!XmlChars.isSpace(c)) {
1589                 strTmp.append(c);
1590                 continue;
1591             }
1592             strTmp.append(' ');
1593             while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) {
1594                 didStrip = true;
1595             }
1596             i--;
1597         }
1598         if (didStrip) {
1599             return strTmp.toString();
1600         } else {
1601             return s;
1602         }
1603     }
1604 
1605     private boolean maybeConditionalSect()
1606             throws IOException, SAXException {
1607 
1608         // [61] conditionalSect ::= includeSect | ignoreSect
1609 
1610         if (!peek("<![")) {
1611             return false;
1612         }
1613 
1614         String keyword;
1615         InputEntity start = in;
1616 
1617         maybeWhitespace();
1618 
1619         if ((keyword = maybeGetName()) == null) {
1620             fatal("P-046");
1621         }
1622         maybeWhitespace();
1623         nextChar('[', "F-030", null);
1624 
1625         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1626         //                extSubsetDecl ']]>'
1627         if ("INCLUDE".equals(keyword)) {
1628             for (;;) {
1629                 while (in.isEOF() && in != start) {
1630                     in = in.pop();
1631                 }
1632                 if (in.isEOF()) {
1633                     error("V-020", null);
1634                 }
1635                 if (peek("]]>")) {
1636                     break;
1637                 }
1638 
1639                 doLexicalPE = false;
1640                 if (maybeWhitespace()) {
1641                     continue;
1642                 }
1643                 if (maybePEReference()) {
1644                     continue;
1645                 }
1646                 doLexicalPE = true;
1647                 if (maybeMarkupDecl() || maybeConditionalSect()) {
1648                     continue;
1649                 }
1650 
1651                 fatal("P-047");
1652             }
1653 
1654             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1655             //            ignoreSectcontents ']]>'
1656             // [64] ignoreSectcontents ::= Ignore ('<!['
1657             //            ignoreSectcontents ']]>' Ignore)*
1658             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1659         } else if ("IGNORE".equals(keyword)) {
1660             int nestlevel = 1;
1661             // ignoreSectcontents
1662             doLexicalPE = false;
1663             while (nestlevel > 0) {
1664                 char c = getc();    // will pop input entities
1665                 if (c == '<') {
1666                     if (peek("![")) {
1667                         nestlevel++;
1668                     }
1669                 } else if (c == ']') {
1670                     if (peek("]>")) {
1671                         nestlevel--;
1672                     }
1673                 } else {
1674                     continue;
1675                 }
1676             }
1677         } else {
1678             fatal("P-048", new Object[]{keyword});
1679         }
1680         return true;
1681     }
1682 

1683     //
1684     // CHAPTER 4:  Physical Structures
1685     //

1686     // parse decimal or hex numeric character reference
1687     private int parseCharNumber()
1688             throws IOException, SAXException {
1689 
1690         char c;
1691         int retval = 0;
1692 
1693         // n.b. we ignore overflow ...
1694         if (getc() != 'x') {
1695             ungetc();
1696             for (;;) {
1697                 c = getc();
1698                 if (c >= '0' && c <= '9') {
1699                     retval *= 10;
1700                     retval += (c - '0');
1701                     continue;
1702                 }
1703                 if (c == ';') {
1704                     return retval;
1705                 }
1706                 fatal("P-049");
1707             }
1708         } else {
1709             for (;;) {
1710                 c = getc();
1711                 if (c >= '0' && c <= '9') {
1712                     retval <<= 4;
1713                     retval += (c - '0');
1714                     continue;
1715                 }
1716                 if (c >= 'a' && c <= 'f') {
1717                     retval <<= 4;
1718                     retval += 10 + (c - 'a');
1719                     continue;
1720                 }
1721                 if (c >= 'A' && c <= 'F') {
1722                     retval <<= 4;
1723                     retval += 10 + (c - 'A');
1724                     continue;
1725                 }
1726                 if (c == ';') {
1727                     return retval;
1728                 }
1729                 fatal("P-050");
1730             }
1731         }
1732     }
1733 
1734     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1735     // though still subject to the 'Char' construct in XML
1736     private int surrogatesToCharTmp(int ucs4)
1737             throws SAXException {
1738 
1739         if (ucs4 <= 0xffff) {
1740             if (XmlChars.isChar(ucs4)) {
1741                 charTmp[0] = (char) ucs4;
1742                 return 1;
1743             }
1744         } else if (ucs4 <= 0x0010ffff) {
1745             // we represent these as UNICODE surrogate pairs
1746             ucs4 -= 0x10000;
1747             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1748             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1749             return 2;
1750         }
1751         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1752         // NOTREACHED
1753         return -1;
1754     }
1755 
1756     private boolean maybePEReference()
1757             throws IOException, SAXException {
1758 
1759         // This is the SYNTACTIC version of this construct.
1760         // When processing external entities, there is also
1761         // a LEXICAL version; see getc() and doLexicalPE.
1762 
1763         // [69] PEReference ::= '%' Name ';'
1764         if (!in.peekc('%')) {
1765             return false;
1766         }
1767 
1768         String name = maybeGetName();
1769         Object entity;
1770 
1771         if (name == null) {
1772             fatal("P-011");
1773         }
1774         nextChar(';', "F-021", name);
1775         entity = params.get(name);
1776 
1777         if (entity instanceof InternalEntity) {
1778             InternalEntity value = (InternalEntity) entity;
1779             pushReader(value.buf, name, false);
1780 
1781         } else if (entity instanceof ExternalEntity) {
1782             pushReader((ExternalEntity) entity);
1783             externalParameterEntity((ExternalEntity) entity);
1784 
1785         } else if (entity == null) {
1786             error("V-022", new Object[]{name});
1787         }
1788         return true;
1789     }
1790 
1791     private boolean maybeEntityDecl()
1792             throws IOException, SAXException {
1793 
1794         // [70] EntityDecl ::= GEDecl | PEDecl
1795         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1796         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1797         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1798         // [74] PEDef     ::= EntityValue |  ExternalID
1799         //
1800         InputEntity start = peekDeclaration("!ENTITY");
1801 
1802         if (start == null) {
1803             return false;
1804         }
1805 
1806         String entityName;
1807         SimpleHashtable defns;
1808         ExternalEntity externalId;
1809         boolean doStore;
1810 
1811         // PE expansion gets selectively turned off several places:
1812         // in ENTITY declarations (here), in comments, in PIs.
1813 
1814         // Here, we allow PE entities to be declared, and allows
1815         // literals to include PE refs without the added spaces
1816         // required with their expansion in markup decls.
1817 
1818         doLexicalPE = false;
1819         whitespace("F-005");
1820         if (in.peekc('%')) {
1821             whitespace("F-006");
1822             defns = params;
1823         } else {
1824             defns = entities;
1825         }
1826 
1827         ungetc();    // leave some whitespace
1828         doLexicalPE = true;
1829         entityName = getMarkupDeclname("F-017", false);
1830         whitespace("F-007");
1831         externalId = maybeExternalID();
1832 
1833         //
1834         // first definition sticks ... e.g. internal subset PEs are used
1835         // to override DTD defaults.  It's also an "error" to incorrectly
1836         // redefine builtin internal entities, but since reporting such
1837         // errors is optional we only give warnings ("just in case") for
1838         // non-parameter entities.
1839         //
1840         doStore = (defns.get(entityName) == null);
1841         if (!doStore && defns == entities) {
1842             warning("P-054", new Object[]{entityName});
1843         }
1844 
1845         // internal entities
1846         if (externalId == null) {
1847             char value[];
1848             InternalEntity entity;
1849 
1850             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
1851             parseLiteral(true);
1852             doLexicalPE = true;
1853             if (doStore) {
1854                 value = new char[strTmp.length()];
1855                 if (value.length != 0) {
1856                     strTmp.getChars(0, value.length, value, 0);
1857                 }
1858                 entity = new InternalEntity(entityName, value);
1859                 entity.isPE = (defns == params);

1860                 defns.put(entityName, entity);
1861                 if (defns == entities) {
1862                     dtdHandler.internalGeneralEntityDecl(entityName,
1863                             new String(value));
1864                 }
1865             }
1866 
1867             // external entities (including unparsed)
1868         } else {
1869             // [76] NDataDecl ::= S 'NDATA' S Name
1870             if (defns == entities && maybeWhitespace()
1871                     && peek("NDATA")) {
1872                 externalId.notation = getMarkupDeclname("F-018", false);
1873 
1874                 // flag undeclared notation for checking after
1875                 // the DTD is fully processed
1876                 if (notations.get(externalId.notation) == null) {
1877                     notations.put(externalId.notation, Boolean.TRUE);
1878                 }
1879             }
1880             externalId.name = entityName;
1881             externalId.isPE = (defns == params);

1882             if (doStore) {
1883                 defns.put(entityName, externalId);
1884                 if (externalId.notation != null) {
1885                     dtdHandler.unparsedEntityDecl(entityName,
1886                             externalId.publicId, externalId.systemId,
1887                             externalId.notation);
1888                 } else if (defns == entities) {
1889                     dtdHandler.externalGeneralEntityDecl(entityName,
1890                             externalId.publicId, externalId.systemId);
1891                 }
1892             }
1893         }
1894         maybeWhitespace();
1895         nextChar('>', "F-031", entityName);
1896         if (start != in) {
1897             error("V-013", null);
1898         }
1899         return true;
1900     }
1901 
1902     private ExternalEntity maybeExternalID()
1903             throws IOException, SAXException {
1904 
1905         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1906         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1907         String temp = null;
1908         ExternalEntity retval;
1909 
1910         if (peek("PUBLIC")) {
1911             whitespace("F-009");
1912             temp = parsePublicId();
1913         } else if (!peek("SYSTEM")) {
1914             return null;
1915         }
1916 
1917         retval = new ExternalEntity(in);
1918         retval.publicId = temp;
1919         whitespace("F-008");
1920         retval.systemId = parseSystemId();
1921         return retval;
1922     }
1923 
1924     private String parseSystemId()
1925             throws IOException, SAXException {
1926 
1927         String uri = getQuotedString("F-034", null);
1928         int temp = uri.indexOf(':');
1929 
1930         // resolve relative URIs ... must do it here since
1931         // it's relative to the source file holding the URI!
1932 
1933         // "new java.net.URL (URL, string)" conforms to RFC 1630,
1934         // but we can't use that except when the URI is a URL.
1935         // The entity resolver is allowed to handle URIs that are
1936         // not URLs, so we pass URIs through with scheme intact
1937         if (temp == -1 || uri.indexOf('/') < temp) {
1938             String baseURI;
1939 
1940             baseURI = in.getSystemId();
1941             if (baseURI == null) {
1942                 fatal("P-055", new Object[]{uri});
1943             }
1944             if (uri.length() == 0) {
1945                 uri = ".";
1946             }
1947             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1948             if (uri.charAt(0) != '/') {
1949                 uri = baseURI + uri;
1950             } else {
1951                 // XXX slashes at the beginning of a relative URI are
1952                 // a special case we don't handle.
1953                 throw new InternalError();
1954             }
1955 
1956             // letting other code map any "/xxx/../" or "/./" to "/",
1957             // since all URIs must handle it the same.
1958         }
1959         // check for fragment ID in URI
1960         if (uri.indexOf('#') != -1) {
1961             error("P-056", new Object[]{uri});
1962         }
1963         return uri;
1964     }
1965 
1966     private void maybeTextDecl()
1967             throws IOException, SAXException {
1968 
1969         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1970         if (peek("<?xml")) {
1971             readVersion(false, "1.0");
1972             readEncoding(true);
1973             maybeWhitespace();
1974             if (!peek("?>")) {
1975                 fatal("P-057");
1976             }
1977         }
1978     }
1979 
1980     private void externalParameterEntity(ExternalEntity next)
1981             throws IOException, SAXException {
1982 
1983         //
1984         // Reap the intended benefits of standalone declarations:
1985         // don't deal with external parameter entities, except to
1986         // validate the standalone declaration.
1987         //
1988 
1989         // n.b. "in external parameter entities" (and external
1990         // DTD subset, same grammar) parameter references can
1991         // occur "within" markup declarations ... expansions can
1992         // cross syntax rules.  Flagged here; affects getc().
1993 
1994         // [79] ExtPE ::= TextDecl? extSubsetDecl
1995         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1996         //        | PEReference | S )*
1997         InputEntity pe;
1998 
1999         // XXX if this returns false ...
2000 
2001         pe = in;
2002         maybeTextDecl();
2003         while (!pe.isEOF()) {
2004             // pop internal PEs (and whitespace before/after)
2005             if (in.isEOF()) {
2006                 in = in.pop();
2007                 continue;
2008             }
2009             doLexicalPE = false;
2010             if (maybeWhitespace()) {
2011                 continue;
2012             }
2013             if (maybePEReference()) {
2014                 continue;
2015             }
2016             doLexicalPE = true;
2017             if (maybeMarkupDecl() || maybeConditionalSect()) {
2018                 continue;
2019             }
2020             break;
2021         }
2022         // if (in != pe) throw new InternalError("who popped my PE?");
2023         if (!pe.isEOF()) {
2024             fatal("P-059", new Object[]{in.getName()});
2025         }
2026     }
2027 
2028     private void readEncoding(boolean must)
2029             throws IOException, SAXException {
2030 
2031         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2032         String name = maybeReadAttribute("encoding", must);
2033 
2034         if (name == null) {
2035             return;
2036         }
2037         for (int i = 0; i < name.length(); i++) {
2038             char c = name.charAt(i);
2039             if ((c >= 'A' && c <= 'Z')
2040                     || (c >= 'a' && c <= 'z')) {
2041                 continue;
2042             }
2043             if (i != 0
2044                     && ((c >= '0' && c <= '9')
2045                     || c == '-'
2046                     || c == '_'
2047                     || c == '.')) {

2048                 continue;
2049             }
2050             fatal("P-060", new Object[]{Character.valueOf(c)});
2051         }
2052 
2053         //
2054         // This should be the encoding in use, and it's even an error for
2055         // it to be anything else (in certain cases that are impractical to
2056         // to test, and may even be insufficient).  So, we do the best we
2057         // can, and warn if things look suspicious.  Note that Java doesn't
2058         // uniformly expose the encodings, and that the names it uses
2059         // internally are nonstandard.  Also, that the XML spec allows
2060         // such "errors" not to be reported at all.
2061         //
2062         String currentEncoding = in.getEncoding();
2063 
2064         if (currentEncoding != null
2065                 && !name.equalsIgnoreCase(currentEncoding)) {
2066             warning("P-061", new Object[]{name, currentEncoding});
2067         }
2068     }
2069 
2070     private boolean maybeNotationDecl()
2071             throws IOException, SAXException {
2072 
2073         // [82] NotationDecl ::= '<!NOTATION' S Name S
2074         //        (ExternalID | PublicID) S? '>'
2075         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2076         InputEntity start = peekDeclaration("!NOTATION");
2077 
2078         if (start == null) {
2079             return false;
2080         }
2081 
2082         String name = getMarkupDeclname("F-019", false);
2083         ExternalEntity entity = new ExternalEntity(in);
2084 
2085         whitespace("F-011");
2086         if (peek("PUBLIC")) {
2087             whitespace("F-009");
2088             entity.publicId = parsePublicId();
2089             if (maybeWhitespace()) {
2090                 if (!peek(">")) {
2091                     entity.systemId = parseSystemId();
2092                 } else {
2093                     ungetc();
2094                 }
2095             }
2096         } else if (peek("SYSTEM")) {
2097             whitespace("F-008");
2098             entity.systemId = parseSystemId();
2099         } else {
2100             fatal("P-062");
2101         }
2102         maybeWhitespace();
2103         nextChar('>', "F-032", name);
2104         if (start != in) {
2105             error("V-013", null);
2106         }
2107         if (entity.systemId != null && entity.systemId.indexOf('#') != -1) {
2108             error("P-056", new Object[]{entity.systemId});
2109         }
2110 
2111         Object value = notations.get(name);
2112         if (value != null && value instanceof ExternalEntity) {
2113             warning("P-063", new Object[]{name});
2114         } else {

2115             notations.put(name, entity);
2116             dtdHandler.notationDecl(name, entity.publicId,
2117                     entity.systemId);
2118         }
2119         return true;
2120     }
2121 

2122     ////////////////////////////////////////////////////////////////
2123     //
2124     //    UTILITIES
2125     //
2126     ////////////////////////////////////////////////////////////////

2127     private char getc() throws IOException, SAXException {
2128 
2129         if (!doLexicalPE) {
2130             char c = in.getc();
2131             return c;
2132         }
2133 
2134         //
2135         // External parameter entities get funky processing of '%param;'
2136         // references.  It's not clearly defined in the XML spec; but it
2137         // boils down to having those refs be _lexical_ in most cases to
2138         // include partial syntax productions.  It also needs selective
2139         // enabling; "<!ENTITY % foo ...>" must work, for example, and
2140         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2141         // if it's expanded in a literal, else "ab  cd".  PEs also do
2142         // not expand within comments or PIs, and external PEs are only
2143         // allowed to have markup decls (and so aren't handled lexically).
2144         //
2145         // This PE handling should be merged into maybeWhitespace, where
2146         // it can be dealt with more consistently.
2147         //
2148         // Also, there are some validity constraints in this area.
2149         //
2150         char c;
2151 
2152         while (in.isEOF()) {
2153             if (in.isInternal() || (doLexicalPE && !in.isDocument())) {
2154                 in = in.pop();
2155             } else {
2156                 fatal("P-064", new Object[]{in.getName()});
2157             }
2158         }
2159         if ((c = in.getc()) == '%' && doLexicalPE) {
2160             // PE ref ::= '%' name ';'
2161             String name = maybeGetName();
2162             Object entity;
2163 
2164             if (name == null) {
2165                 fatal("P-011");
2166             }
2167             nextChar(';', "F-021", name);
2168             entity = params.get(name);
2169 
2170             // push a magic "entity" before and after the
2171             // real one, so ungetc() behaves uniformly
2172             pushReader(" ".toCharArray(), null, false);
2173             if (entity instanceof InternalEntity) {
2174                 pushReader(((InternalEntity) entity).buf, name, false);
2175             } else if (entity instanceof ExternalEntity) // PEs can't be unparsed!

2176             // XXX if this returns false ...
2177             {
2178                 pushReader((ExternalEntity) entity);
2179             } else if (entity == null) // see note in maybePEReference re making this be nonfatal.
2180             {
2181                 fatal("V-022");
2182             } else {
2183                 throw new InternalError();
2184             }
2185             pushReader(" ".toCharArray(), null, false);
2186             return in.getc();
2187         }
2188         return c;
2189     }
2190 
2191     private void ungetc() {
2192 
2193         in.ungetc();
2194     }
2195 
2196     private boolean peek(String s)
2197             throws IOException, SAXException {
2198 
2199         return in.peek(s, null);
2200     }
2201 
2202     // Return the entity starting the specified declaration
2203     // (for validating declaration nesting) else null.

2204     private InputEntity peekDeclaration(String s)
2205             throws IOException, SAXException {
2206 
2207         InputEntity start;
2208 
2209         if (!in.peekc('<')) {
2210             return null;
2211         }
2212         start = in;
2213         if (in.peek(s, null)) {
2214             return start;
2215         }
2216         in.ungetc();
2217         return null;
2218     }
2219 
2220     private void nextChar(char c, String location, String near)
2221             throws IOException, SAXException {
2222 
2223         while (in.isEOF() && !in.isDocument()) {
2224             in = in.pop();
2225         }
2226         if (!in.peekc(c)) {
2227             fatal("P-008", new Object[]{Character.valueOf(c),
2228                         messages.getMessage(locale, location),
2229                         (near == null ? "" : ('"' + near + '"'))});
2230         }
2231     }
2232 
2233     private void pushReader(char buf[], String name, boolean isGeneral)

2234             throws SAXException {
2235 
2236         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2237         r.init(buf, name, in, !isGeneral);
2238         in = r;
2239     }
2240 
2241     private boolean pushReader(ExternalEntity next)
2242             throws IOException, SAXException {
2243 
2244         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2245         InputSource s;
2246         try {
2247             s = next.getInputSource(resolver);
2248         } catch (IOException e) {
2249             String msg =
2250                     "unable to open the external entity from :" + next.systemId;
2251             if (next.publicId != null) {
2252                 msg += " (public id:" + next.publicId + ")";
2253             }
2254 
2255             SAXParseException spe = new SAXParseException(msg,
2256                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2257             dtdHandler.fatalError(spe);
2258             throw e;
2259         }
2260 
2261         r.init(s, next.name, in, next.isPE);
2262         in = r;
2263         return true;
2264     }
2265 
2266     public String getPublicId() {
2267 
2268         return (in == null) ? null : in.getPublicId();
2269     }
2270 
2271     public String getSystemId() {
2272 
2273         return (in == null) ? null : in.getSystemId();
2274     }
2275 
2276     public int getLineNumber() {
2277 
2278         return (in == null) ? -1 : in.getLineNumber();
2279     }
2280 
2281     public int getColumnNumber() {
2282 
2283         return (in == null) ? -1 : in.getColumnNumber();
2284     }
2285 
2286     // error handling convenience routines
2287     private void warning(String messageId, Object parameters[])

2288             throws SAXException {
2289 
2290         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2291                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2292 
2293         dtdHandler.warning(e);
2294     }
2295 
2296     void error(String messageId, Object parameters[])
2297             throws SAXException {
2298 
2299         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2300                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2301 
2302         dtdHandler.error(e);
2303     }
2304 
2305     private void fatal(String messageId) throws SAXException {
2306 
2307         fatal(messageId, null);
2308     }
2309 
2310     private void fatal(String messageId, Object parameters[])
2311             throws SAXException {
2312 
2313         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2314                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2315 
2316         dtdHandler.fatalError(e);
2317 
2318         throw e;
2319     }
2320 
2321     //
2322     // Map char arrays to strings ... cuts down both on memory and
2323     // CPU usage for element/attribute/other names that are reused.
2324     //
2325     // Documents typically repeat names a lot, so we more or less
2326     // intern all the strings within the document; since some strings
2327     // are repeated in multiple documents (e.g. stylesheets) we go
2328     // a bit further, and intern globally.
2329     //
2330     static class NameCache {
2331         //
2332         // Unless we auto-grow this, the default size should be a
2333         // reasonable bit larger than needed for most XML files
2334         // we've yet seen (and be prime).  If it's too small, the
2335         // penalty is just excess cache collisions.
2336         //
2337 
2338         NameCacheEntry hashtable[] = new NameCacheEntry[541];
2339 
2340         //
2341         // Usually we just want to get the 'symbol' for these chars
2342         //
2343         String lookup(char value[], int len) {
2344 
2345             return lookupEntry(value, len).name;
2346         }
2347 
2348         //
2349         // Sometimes we need to scan the chars in the resulting
2350         // string, so there's an accessor which exposes them.
2351         // (Mostly for element end tags.)
2352         //
2353         NameCacheEntry lookupEntry(char value[], int len) {
2354 
2355             int index = 0;
2356             NameCacheEntry entry;
2357 
2358             // hashing to get index
2359             for (int i = 0; i < len; i++) {
2360                 index = index * 31 + value[i];
2361             }
2362             index &= 0x7fffffff;
2363             index %= hashtable.length;
2364 
2365             // return entry if one's there ...
2366             for (entry = hashtable[index];
2367                     entry != null;
2368                     entry = entry.next) {
2369                 if (entry.matches(value, len)) {
2370                     return entry;
2371                 }
2372             }
2373 
2374             // else create new one
2375             entry = new NameCacheEntry();
2376             entry.chars = new char[len];
2377             System.arraycopy(value, 0, entry.chars, 0, len);
2378             entry.name = new String(entry.chars);
2379             //
2380             // NOTE:  JDK 1.1 has a fixed size string intern table,
2381             // with non-GC'd entries.  It can panic here; that's a
2382             // JDK problem, use 1.2 or later with many identifiers.
2383             //
2384             entry.name = entry.name.intern();        // "global" intern
2385             entry.next = hashtable[index];
2386             hashtable[index] = entry;
2387             return entry;
2388         }
2389     }
2390 
2391     static class NameCacheEntry {
2392 
2393         String name;
2394         char chars[];
2395         NameCacheEntry next;
2396 
2397         boolean matches(char value[], int len) {
2398             if (chars == null || chars.length != len) {

2399                 return false;
2400             }
2401             for (int i = 0; i < len; i++) {
2402                 if (value[i] != chars[i]) {
2403                     return false;
2404                 }
2405             }
2406             return true;
2407         }
2408     }
2409 
2410     //
2411     // Message catalog for diagnostics.
2412     //
2413     static final Catalog messages = new Catalog();
2414 
2415     static final class Catalog extends MessageCatalog {
2416 
2417         Catalog() {
2418             super(DTDParser.class);
2419         }
2420     }

2421 }
< prev index next >