1 /* 2 * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.dtdparser; 27 28 import org.xml.sax.EntityResolver; 29 import org.xml.sax.InputSource; 30 import org.xml.sax.Locator; 31 import org.xml.sax.SAXException; 32 import org.xml.sax.SAXParseException; 33 34 import java.io.IOException; 35 import java.util.ArrayList; 36 import java.util.Enumeration; 37 import java.util.Hashtable; 38 import java.util.Locale; 39 import java.util.Set; 40 import java.util.Vector; 41 import java.util.logging.Level; 42 import java.util.logging.Logger; 43 44 /** 45 * This implements parsing of XML 1.0 DTDs. 46 * <p> 47 * This conforms to the portion of the XML 1.0 specification related to the 48 * external DTD subset. 49 * <p> 50 * For multi-language applications (such as web servers using XML processing to 51 * create dynamic content), a method supports choosing a locale for parser 52 * diagnostics which is both understood by the message recipient and supported 53 * by the parser. 54 * <p> 55 * This parser produces a stream of parse events. It supports some features 56 * (exposing comments, CDATA sections, and entity references) which are not 57 * required to be reported by conformant XML processors. 58 * 59 * @author David Brownell 60 * @author Janet Koenig 61 * @author Kohsuke KAWAGUCHI 62 * @version $Id: DTDParser.java,v 1.2 2009-04-16 15:25:49 snajper Exp $ 63 */ 64 public class DTDParser { 65 66 public final static String TYPE_CDATA = "CDATA"; 67 public final static String TYPE_ID = "ID"; 68 public final static String TYPE_IDREF = "IDREF"; 69 public final static String TYPE_IDREFS = "IDREFS"; 70 public final static String TYPE_ENTITY = "ENTITY"; 71 public final static String TYPE_ENTITIES = "ENTITIES"; 72 public final static String TYPE_NMTOKEN = "NMTOKEN"; 73 public final static String TYPE_NMTOKENS = "NMTOKENS"; 74 public final static String TYPE_NOTATION = "NOTATION"; 75 public final static String TYPE_ENUMERATION = "ENUMERATION"; 76 // stack of input entities being merged 77 private InputEntity in; 78 // temporaries reused during parsing 79 private StringBuffer strTmp; 80 private char nameTmp[]; 81 private NameCache nameCache; 82 private char charTmp[] = new char[2]; 83 // temporary DTD parsing state 84 private boolean doLexicalPE; 85 // DTD state, used during parsing 86 // private SimpleHashtable elements = new SimpleHashtable (47); 87 protected final Set declaredElements = new java.util.HashSet(); 88 private SimpleHashtable params = new SimpleHashtable(7); 89 // exposed to package-private subclass 90 Hashtable notations = new Hashtable(7); 91 SimpleHashtable entities = new SimpleHashtable(17); 92 private SimpleHashtable ids = new SimpleHashtable(); 93 // listeners for DTD parsing events 94 private DTDEventListener dtdHandler; 95 private EntityResolver resolver; 96 private Locale locale; 97 // string constants -- use these copies so "==" works 98 // package private 99 static final String strANY = "ANY"; 100 static final String strEMPTY = "EMPTY"; 101 102 private static final Logger LOGGER = Logger.getLogger(DTDParser.class.getName()); 103 104 /** 105 * Used by applications to request locale for diagnostics. 106 * 107 * @param l The locale to use, or null to use system defaults (which may 108 * include only message IDs). 109 */ 110 public void setLocale(Locale l) throws SAXException { 111 112 if (l != null && !messages.isLocaleSupported(l.toString())) { 113 throw new SAXException(messages.getMessage(locale, 114 "P-078", new Object[]{l})); 115 } 116 locale = l; 117 } 118 119 /** 120 * Returns the diagnostic locale. 121 */ 122 public Locale getLocale() { 123 return locale; 124 } 125 126 /** 127 * Chooses a client locale to use for diagnostics, using the first language 128 * specified in the list that is supported by this parser. That locale is 129 * then set using <a href="#setLocale(java.util.Locale)"> setLocale()</a>. 130 * Such a list could be provided by a variety of user preference mechanisms, 131 * including the HTTP <em>Accept-Language</em> header field. 132 * 133 * @param languages Array of language specifiers, ordered with the most 134 * preferable one at the front. For example, "en-ca" then "fr-ca", followed 135 * by "zh_CN". Both RFC 1766 and Java styles are supported. 136 * @return The chosen locale, or null. 137 * @see MessageCatalog 138 */ 139 public Locale chooseLocale(String languages[]) 140 throws SAXException { 141 142 Locale l = messages.chooseLocale(languages); 143 144 if (l != null) { 145 setLocale(l); 146 } 147 return l; 148 } 149 150 /** 151 * Lets applications control entity resolution. 152 */ 153 public void setEntityResolver(EntityResolver r) { 154 155 resolver = r; 156 } 157 158 /** 159 * Returns the object used to resolve entities 160 */ 161 public EntityResolver getEntityResolver() { 162 163 return resolver; 164 } 165 166 /** 167 * Used by applications to set handling of DTD parsing events. 168 */ 169 public void setDtdHandler(DTDEventListener handler) { 170 dtdHandler = handler; 171 if (handler != null) { 172 handler.setDocumentLocator(new Locator() { 173 @Override 174 public String getPublicId() { 175 return DTDParser.this.getPublicId(); 176 } 177 178 @Override 179 public String getSystemId() { 180 return DTDParser.this.getSystemId(); 181 } 182 183 @Override 184 public int getLineNumber() { 185 return DTDParser.this.getLineNumber(); 186 } 187 188 @Override 189 public int getColumnNumber() { 190 return DTDParser.this.getColumnNumber(); 191 } 192 }); 193 } 194 } 195 196 /** 197 * Returns the handler used to for DTD parsing events. 198 */ 199 public DTDEventListener getDtdHandler() { 200 return dtdHandler; 201 } 202 203 /** 204 * Parse a DTD. 205 */ 206 public void parse(InputSource in) 207 throws IOException, SAXException { 208 init(); 209 parseInternal(in); 210 } 211 212 /** 213 * Parse a DTD. 214 */ 215 public void parse(String uri) 216 throws IOException, SAXException { 217 InputSource inSource; 218 219 init(); 220 // System.out.println ("parse (\"" + uri + "\")"); 221 inSource = resolver.resolveEntity(null, uri); 222 223 // If custom resolver punts resolution to parser, handle it ... 224 if (inSource == null) { 225 inSource = Resolver.createInputSource(new java.net.URL(uri), false); 226 227 // ... or if custom resolver doesn't correctly construct the 228 // input entity, patch it up enough so relative URIs work, and 229 // issue a warning to minimize later confusion. 230 } else if (inSource.getSystemId() == null) { 231 warning("P-065", null); 232 inSource.setSystemId(uri); 233 } 234 235 parseInternal(inSource); 236 } 237 238 // makes sure the parser is reset to "before a document" 239 private void init() { 240 in = null; 241 242 // alloc temporary data used in parsing 243 strTmp = new StringBuffer(); 244 nameTmp = new char[20]; 245 nameCache = new NameCache(); 246 247 // reset doc info 248 // isInAttribute = false; 249 250 doLexicalPE = false; 251 252 entities.clear(); 253 notations.clear(); 254 params.clear(); 255 // elements.clear (); 256 declaredElements.clear(); 257 258 // initialize predefined references ... re-interpreted later 259 builtin("amp", "&"); 260 builtin("lt", "<"); 261 builtin("gt", ">"); 262 builtin("quot", "\""); 263 builtin("apos", "'"); 264 265 if (locale == null) { 266 locale = Locale.getDefault(); 267 } 268 if (resolver == null) { 269 resolver = new Resolver(); 270 } 271 if (dtdHandler == null) { 272 dtdHandler = new DTDHandlerBase(); 273 } 274 } 275 276 private void builtin(String entityName, String entityValue) { 277 InternalEntity entity; 278 entity = new InternalEntity(entityName, entityValue.toCharArray()); 279 entities.put(entityName, entity); 280 } 281 282 //////////////////////////////////////////////////////////////// 283 // 284 // parsing is by recursive descent, code roughly 285 // following the BNF rules except tweaked for simple 286 // lookahead. rules are more or less in numeric order, 287 // except where code sharing suggests other structures. 288 // 289 // a classic benefit of recursive descent parsers: it's 290 // relatively easy to get diagnostics that make sense. 291 // 292 //////////////////////////////////////////////////////////////// 293 @SuppressWarnings("CallToThreadDumpStack") 294 private void parseInternal(InputSource input) 295 throws IOException, SAXException { 296 297 if (input == null) { 298 fatal("P-000"); 299 } 300 301 try { 302 in = InputEntity.getInputEntity(dtdHandler, locale); 303 in.init(input, null, null, false); 304 305 dtdHandler.startDTD(in); 306 307 // [30] extSubset ::= TextDecl? extSubsetDecl 308 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 309 // | PEReference | S )* 310 // ... same as [79] extPE, which is where the code is 311 312 ExternalEntity externalSubset = new ExternalEntity(in); 313 externalParameterEntity(externalSubset); 314 315 if (!in.isEOF()) { 316 fatal("P-001", new Object[]{Integer.toHexString(((int) getc()))}); 317 } 318 afterRoot(); 319 dtdHandler.endDTD(); 320 321 } catch (EndOfInputException e) { 322 if (!in.isDocument()) { 323 String name = in.getName(); 324 do { // force a relevant URI and line number 325 in = in.pop(); 326 } while (in.isInternal()); 327 fatal("P-002", new Object[]{name}); 328 } else { 329 fatal("P-003", null); 330 } 331 } catch (RuntimeException e) { 332 LOGGER.log(Level.SEVERE, "Internal DTD parser error.", e); 333 throw new SAXParseException(e.getMessage() != null 334 ? e.getMessage() : e.getClass().getName(), 335 getPublicId(), getSystemId(), 336 getLineNumber(), getColumnNumber()); 337 338 } finally { 339 // recycle temporary data used during parsing 340 strTmp = null; 341 nameTmp = null; 342 nameCache = null; 343 344 // ditto input sources etc 345 if (in != null) { 346 in.close(); 347 in = null; 348 } 349 350 // get rid of all DTD info ... some of it would be 351 // useful for editors etc, investigate later. 352 353 params.clear(); 354 entities.clear(); 355 notations.clear(); 356 declaredElements.clear(); 357 // elements.clear(); 358 ids.clear(); 359 } 360 } 361 362 void afterRoot() throws SAXException { 363 // Make sure all IDREFs match declared ID attributes. We scan 364 // after the document element is parsed, since XML allows forward 365 // references, and only now can we know if they're all resolved. 366 367 for (Enumeration e = ids.keys(); 368 e.hasMoreElements();) { 369 String id = (String) e.nextElement(); 370 Boolean value = (Boolean) ids.get(id); 371 if (Boolean.FALSE.equals(value)) { 372 error("V-024", new Object[]{id}); 373 } 374 } 375 } 376 377 // role is for diagnostics 378 private void whitespace(String roleId) 379 throws IOException, SAXException { 380 381 // [3] S ::= (#x20 | #x9 | #xd | #xa)+ 382 if (!maybeWhitespace()) { 383 fatal("P-004", new Object[]{messages.getMessage(locale, roleId)}); 384 } 385 } 386 387 // S? 388 private boolean maybeWhitespace() 389 throws IOException, SAXException { 390 391 if (!doLexicalPE) { 392 return in.maybeWhitespace(); 393 } 394 395 // see getc() for the PE logic -- this lets us splice 396 // expansions of PEs in "anywhere". getc() has smarts, 397 // so for external PEs we don't bypass it. 398 399 // XXX we can marginally speed PE handling, and certainly 400 // be cleaner (hence potentially more correct), by using 401 // the observations that expanded PEs only start and stop 402 // where whitespace is allowed. getc wouldn't need any 403 // "lexical" PE expansion logic, and no other method needs 404 // to handle termination of PEs. (parsing of literals would 405 // still need to pop entities, but not parsing of references 406 // in content.) 407 408 char c = getc(); 409 boolean saw = false; 410 411 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 412 saw = true; 413 414 // this gracefully ends things when we stop playing 415 // with internal parameters. caller should have a 416 // grammar rule allowing whitespace at end of entity. 417 if (in.isEOF() && !in.isInternal()) { 418 return saw; 419 } 420 c = getc(); 421 } 422 ungetc(); 423 return saw; 424 } 425 426 private String maybeGetName() 427 throws IOException, SAXException { 428 429 NameCacheEntry entry = maybeGetNameCacheEntry(); 430 return (entry == null) ? null : entry.name; 431 } 432 433 private NameCacheEntry maybeGetNameCacheEntry() 434 throws IOException, SAXException { 435 436 // [5] Name ::= (Letter|'_'|':') (Namechar)* 437 char c = getc(); 438 439 if (!XmlChars.isLetter(c) && c != ':' && c != '_') { 440 ungetc(); 441 return null; 442 } 443 return nameCharString(c); 444 } 445 446 // Used when parsing enumerations 447 private String getNmtoken() 448 throws IOException, SAXException { 449 450 // [7] Nmtoken ::= (Namechar)+ 451 char c = getc(); 452 if (!XmlChars.isNameChar(c)) { 453 fatal("P-006", new Object[]{Character.valueOf(c)}); 454 } 455 return nameCharString(c).name; 456 } 457 458 // n.b. this gets used when parsing attribute values (for 459 // internal references) so we can't use strTmp; it's also 460 // a hotspot for CPU and memory in the parser (called at least 461 // once for each element) so this has been optimized a bit. 462 private NameCacheEntry nameCharString(char c) 463 throws IOException, SAXException { 464 465 int i = 1; 466 467 nameTmp[0] = c; 468 for (;;) { 469 if ((c = in.getNameChar()) == 0) { 470 break; 471 } 472 if (i >= nameTmp.length) { 473 char tmp[] = new char[nameTmp.length + 10]; 474 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); 475 nameTmp = tmp; 476 } 477 nameTmp[i++] = c; 478 } 479 return nameCache.lookupEntry(nameTmp, i); 480 } 481 482 // 483 // much similarity between parsing entity values in DTD 484 // and attribute values (in DTD or content) ... both follow 485 // literal parsing rules, newline canonicalization, etc 486 // 487 // leaves value in 'strTmp' ... either a "replacement text" (4.5), 488 // or else partially normalized attribute value (the first bit 489 // of 3.3.3's spec, without the "if not CDATA" bits). 490 // 491 @SuppressWarnings("UnusedAssignment") 492 private void parseLiteral(boolean isEntityValue) 493 throws IOException, SAXException { 494 495 // [9] EntityValue ::= 496 // '"' ([^"&%] | Reference | PEReference)* '"' 497 // | "'" ([^'&%] | Reference | PEReference)* "'" 498 // [10] AttValue ::= 499 // '"' ([^"&] | Reference )* '"' 500 // | "'" ([^'&] | Reference )* "'" 501 char quote = getc(); 502 char c; 503 InputEntity source = in; 504 505 if (quote != '\'' && quote != '"') { 506 fatal("P-007"); 507 } 508 509 // don't report entity expansions within attributes, 510 // they're reported "fully expanded" via SAX 511 // isInAttribute = !isEntityValue; 512 513 // get value into strTmp 514 strTmp = new StringBuffer(); 515 516 // scan, allowing entity push/pop wherever ... 517 // expanded entities can't terminate the literal! 518 for (;;) { 519 if (in != source && in.isEOF()) { 520 // we don't report end of parsed entities 521 // within attributes (no SAX hooks) 522 in = in.pop(); 523 continue; 524 } 525 if ((c = getc()) == quote && in == source) { 526 break; 527 } 528 529 // 530 // Basically the "reference in attribute value" 531 // row of the chart in section 4.4 of the spec 532 // 533 if (c == '&') { 534 String entityName = maybeGetName(); 535 536 if (entityName != null) { 537 nextChar(';', "F-020", entityName); 538 539 // 4.4 says: bypass these here ... we'll catch 540 // forbidden refs to unparsed entities on use 541 if (isEntityValue) { 542 strTmp.append('&'); 543 strTmp.append(entityName); 544 strTmp.append(';'); 545 continue; 546 } 547 expandEntityInLiteral(entityName, entities, isEntityValue); 548 549 // character references are always included immediately 550 } else if ((getc()) == '#') { 551 int tmp = parseCharNumber(); 552 553 if (tmp > 0xffff) { 554 tmp = surrogatesToCharTmp(tmp); 555 strTmp.append(charTmp[0]); 556 if (tmp == 2) { 557 strTmp.append(charTmp[1]); 558 } 559 } else { 560 strTmp.append((char) tmp); 561 } 562 } else { 563 fatal("P-009"); 564 } 565 continue; 566 567 } 568 569 // expand parameter entities only within entity value literals 570 if (c == '%' && isEntityValue) { 571 String entityName = maybeGetName(); 572 573 if (entityName != null) { 574 nextChar(';', "F-021", entityName); 575 expandEntityInLiteral(entityName, params, isEntityValue); 576 continue; 577 } else { 578 fatal("P-011"); 579 } 580 } 581 582 // For attribute values ... 583 if (!isEntityValue) { 584 // 3.3.3 says whitespace normalizes to space... 585 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 586 strTmp.append(' '); 587 continue; 588 } 589 590 // "<" not legal in parsed literals ... 591 if (c == '<') { 592 fatal("P-012"); 593 } 594 } 595 596 strTmp.append(c); 597 } 598 // isInAttribute = false; 599 } 600 601 // does a SINGLE expansion of the entity (often reparsed later) 602 private void expandEntityInLiteral(String name, SimpleHashtable table, 603 boolean isEntityValue) 604 throws IOException, SAXException { 605 606 Object entity = table.get(name); 607 608 if (entity instanceof InternalEntity) { 609 InternalEntity value = (InternalEntity) entity; 610 pushReader(value.buf, name, !value.isPE); 611 612 } else if (entity instanceof ExternalEntity) { 613 if (!isEntityValue) // must be a PE ... 614 { 615 fatal("P-013", new Object[]{name}); 616 } 617 // XXX if this returns false ... 618 pushReader((ExternalEntity) entity); 619 620 } else if (entity == null) { 621 // 622 // Note: much confusion about whether spec requires such 623 // errors to be fatal in many cases, but none about whether 624 // it allows "normal" errors to be unrecoverable! 625 // 626 fatal((table == params) ? "V-022" : "P-014", 627 new Object[]{name}); 628 } 629 } 630 631 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 632 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' 633 // NOTE: XML spec should explicitly say that PE ref syntax is 634 // ignored in PIs, comments, SystemLiterals, and Pubid Literal 635 // values ... can't process the XML spec's own DTD without doing 636 // that for comments. 637 private String getQuotedString(String type, String extra) 638 throws IOException, SAXException { 639 640 // use in.getc to bypass PE processing 641 char quote = in.getc(); 642 643 if (quote != '\'' && quote != '"') { 644 fatal("P-015", new Object[]{ 645 messages.getMessage(locale, type, new Object[]{extra}) 646 }); 647 } 648 649 char c; 650 651 strTmp = new StringBuffer(); 652 while ((c = in.getc()) != quote) { 653 strTmp.append((char) c); 654 } 655 return strTmp.toString(); 656 } 657 658 private String parsePublicId() throws IOException, SAXException { 659 660 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") 661 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] 662 String retval = getQuotedString("F-033", null); 663 for (int i = 0; i < retval.length(); i++) { 664 char c = retval.charAt(i); 665 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 666 && !(c >= 'A' && c <= 'Z') 667 && !(c >= 'a' && c <= 'z')) { 668 fatal("P-016", new Object[]{Character.valueOf(c)}); 669 } 670 } 671 strTmp = new StringBuffer(); 672 strTmp.append(retval); 673 return normalize(false); 674 } 675 676 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 677 // handled by: InputEntity.parsedContent() 678 private boolean maybeComment(boolean skipStart) 679 throws IOException, SAXException { 680 681 // [15] Comment ::= '<!--' 682 // ( (Char - '-') | ('-' (Char - '-'))* 683 // '-->' 684 if (!in.peek(skipStart ? "!--" : "<!--", null)) { 685 return false; 686 } 687 688 boolean savedLexicalPE = doLexicalPE; 689 boolean saveCommentText; 690 691 doLexicalPE = false; 692 saveCommentText = false; 693 if (saveCommentText) { 694 strTmp = new StringBuffer(); 695 } 696 697 oneComment: 698 for (;;) { 699 try { 700 // bypass PE expansion, but permit PEs 701 // to complete ... valid docs won't care. 702 for (;;) { 703 int c = getc(); 704 if (c == '-') { 705 c = getc(); 706 if (c != '-') { 707 if (saveCommentText) { 708 strTmp.append('-'); 709 } 710 ungetc(); 711 continue; 712 } 713 nextChar('>', "F-022", null); 714 break oneComment; 715 } 716 if (saveCommentText) { 717 strTmp.append((char) c); 718 } 719 } 720 } catch (EndOfInputException e) { 721 // 722 // This is fatal EXCEPT when we're processing a PE... 723 // in which case a validating processor reports an error. 724 // External PEs are easy to detect; internal ones we 725 // infer by being an internal entity outside an element. 726 // 727 if (in.isInternal()) { 728 error("V-021", null); 729 } 730 fatal("P-017"); 731 } 732 } 733 doLexicalPE = savedLexicalPE; 734 if (saveCommentText) { 735 dtdHandler.comment(strTmp.toString()); 736 } 737 return true; 738 } 739 740 private boolean maybePI(boolean skipStart) 741 throws IOException, SAXException { 742 743 // [16] PI ::= '<?' PITarget 744 // (S (Char* - (Char* '?>' Char*)))? 745 // '?>' 746 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') 747 boolean savedLexicalPE = doLexicalPE; 748 749 if (!in.peek(skipStart ? "?" : "<?", null)) { 750 return false; 751 } 752 doLexicalPE = false; 753 754 String target = maybeGetName(); 755 756 if (target == null) { 757 fatal("P-018"); 758 } 759 if ("xml".equals(target)) { 760 fatal("P-019"); 761 } 762 if ("xml".equalsIgnoreCase(target)) { 763 fatal("P-020", new Object[]{target}); 764 } 765 766 if (maybeWhitespace()) { 767 strTmp = new StringBuffer(); 768 try { 769 for (;;) { 770 // use in.getc to bypass PE processing 771 char c = in.getc(); 772 //Reached the end of PI. 773 if (c == '?' && in.peekc('>')) { 774 break; 775 } 776 strTmp.append(c); 777 } 778 } catch (EndOfInputException e) { 779 fatal("P-021"); 780 } 781 dtdHandler.processingInstruction(target, strTmp.toString()); 782 } else { 783 if (!in.peek("?>", null)) { 784 fatal("P-022"); 785 } 786 dtdHandler.processingInstruction(target, ""); 787 } 788 789 doLexicalPE = savedLexicalPE; 790 return true; 791 } 792 793 // [18] CDSect ::= CDStart CData CDEnd 794 // [19] CDStart ::= '<![CDATA[' 795 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 796 // [21] CDEnd ::= ']]>' 797 // 798 // ... handled by InputEntity.unparsedContent() 799 // collapsing several rules together ... 800 // simpler than attribute literals -- no reference parsing! 801 private String maybeReadAttribute(String name, boolean must) 802 throws IOException, SAXException { 803 804 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" 805 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" 806 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" 807 if (!maybeWhitespace()) { 808 if (!must) { 809 return null; 810 } 811 fatal("P-024", new Object[]{name}); 812 // NOTREACHED 813 } 814 815 if (!peek(name)) { 816 if (must) { 817 fatal("P-024", new Object[]{name}); 818 } else { 819 // To ensure that the whitespace is there so that when we 820 // check for the next attribute we assure that the 821 // whitespace still exists. 822 ungetc(); 823 return null; 824 } 825 } 826 827 // [25] Eq ::= S? '=' S? 828 maybeWhitespace(); 829 nextChar('=', "F-023", null); 830 maybeWhitespace(); 831 832 return getQuotedString("F-035", name); 833 } 834 835 private void readVersion(boolean must, String versionNum) 836 throws IOException, SAXException { 837 838 String value = maybeReadAttribute("version", must); 839 840 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ 841 842 if (must && value == null) { 843 fatal("P-025", new Object[]{versionNum}); 844 } 845 if (value != null) { 846 int length = value.length(); 847 for (int i = 0; i < length; i++) { 848 char c = value.charAt(i); 849 if (!((c >= '0' && c <= '9') 850 || c == '_' || c == '.' 851 || (c >= 'a' && c <= 'z') 852 || (c >= 'A' && c <= 'Z') 853 || c == ':' || c == '-')) { 854 fatal("P-026", new Object[]{value}); 855 } 856 } 857 } 858 if (value != null && !value.equals(versionNum)) { 859 error("P-027", new Object[]{versionNum, value}); 860 } 861 } 862 863 // common code used by most markup declarations 864 // ... S (Q)Name ... 865 private String getMarkupDeclname(String roleId, boolean qname) 866 throws IOException, SAXException { 867 868 String name; 869 870 whitespace(roleId); 871 name = maybeGetName(); 872 if (name == null) { 873 fatal("P-005", new Object[]{messages.getMessage(locale, roleId)}); 874 } 875 return name; 876 } 877 878 private boolean maybeMarkupDecl() 879 throws IOException, SAXException { 880 881 // [29] markupdecl ::= elementdecl | Attlistdecl 882 // | EntityDecl | NotationDecl | PI | Comment 883 return maybeElementDecl() 884 || maybeAttlistDecl() 885 || maybeEntityDecl() 886 || maybeNotationDecl() 887 || maybePI(false) 888 || maybeComment(false); 889 } 890 private static final String XmlLang = "xml:lang"; 891 892 private boolean isXmlLang(String value) { 893 894 // [33] LanguageId ::= Langcode ('-' Subcode)* 895 // [34] Langcode ::= ISO639Code | IanaCode | UserCode 896 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] 897 // [36] IanaCode ::= [iI] '-' SubCode 898 // [37] UserCode ::= [xX] '-' SubCode 899 // [38] SubCode ::= [a-zA-Z]+ 900 901 // the ISO and IANA codes (and subcodes) are registered, 902 // but that's neither a WF nor a validity constraint. 903 904 int nextSuffix; 905 char c; 906 907 if (value.length() < 2) { 908 return false; 909 } 910 c = value.charAt(1); 911 if (c == '-') { // IANA, or user, code 912 c = value.charAt(0); 913 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) { 914 return false; 915 } 916 nextSuffix = 1; 917 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { 918 // 2 letter ISO code, or error 919 c = value.charAt(0); 920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { 921 return false; 922 } 923 nextSuffix = 2; 924 } else { 925 return false; 926 } 927 928 // here "suffix" ::= '-' [a-zA-Z]+ suffix* 929 while (nextSuffix < value.length()) { 930 c = value.charAt(nextSuffix); 931 if (c != '-') { 932 break; 933 } 934 while (++nextSuffix < value.length()) { 935 c = value.charAt(nextSuffix); 936 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { 937 break; 938 } 939 } 940 } 941 return value.length() == nextSuffix && c != '-'; 942 } 943 944 // 945 // CHAPTER 3: Logical Structures 946 // 947 /** 948 * To validate, subclassers should at this time make sure that values are of 949 * the declared types:<UL> <LI> ID and IDREF(S) values are Names <LI> 950 * NMTOKEN(S) are Nmtokens <LI> ENUMERATION values match one of the tokens 951 * <LI> NOTATION values match a notation name <LI> ENTITIY(IES) values match 952 * an unparsed external entity </UL> 953 * <p> 954 * <P> Separately, make sure IDREF values match some ID provided in the 955 * document (in the afterRoot method). 956 */ 957 /* void validateAttributeSyntax (Attribute attr, String value) 958 throws DTDParseException { 959 // ID, IDREF(S) ... values are Names 960 if (Attribute.ID == attr.type()) { 961 if (!XmlNames.isName (value)) 962 error ("V-025", new Object [] { value }); 963 964 Boolean b = (Boolean) ids.getNonInterned (value); 965 if (b == null || b.equals (Boolean.FALSE)) 966 ids.put (value.intern (), Boolean.TRUE); 967 else 968 error ("V-026", new Object [] { value }); 969 970 } else if (Attribute.IDREF == attr.type()) { 971 if (!XmlNames.isName (value)) 972 error ("V-027", new Object [] { value }); 973 974 Boolean b = (Boolean) ids.getNonInterned (value); 975 if (b == null) 976 ids.put (value.intern (), Boolean.FALSE); 977 978 } else if (Attribute.IDREFS == attr.type()) { 979 StringTokenizer tokenizer = new StringTokenizer (value); 980 Boolean b; 981 boolean sawValue = false; 982 983 while (tokenizer.hasMoreTokens ()) { 984 value = tokenizer.nextToken (); 985 if (!XmlNames.isName (value)) 986 error ("V-027", new Object [] { value }); 987 b = (Boolean) ids.getNonInterned (value); 988 if (b == null) 989 ids.put (value.intern (), Boolean.FALSE); 990 sawValue = true; 991 } 992 if (!sawValue) 993 error ("V-039", null); 994 995 996 // NMTOKEN(S) ... values are Nmtoken(s) 997 } else if (Attribute.NMTOKEN == attr.type()) { 998 if (!XmlNames.isNmtoken (value)) 999 error ("V-028", new Object [] { value }); 1000 1001 } else if (Attribute.NMTOKENS == attr.type()) { 1002 StringTokenizer tokenizer = new StringTokenizer (value); 1003 boolean sawValue = false; 1004 1005 while (tokenizer.hasMoreTokens ()) { 1006 value = tokenizer.nextToken (); 1007 if (!XmlNames.isNmtoken (value)) 1008 error ("V-028", new Object [] { value }); 1009 sawValue = true; 1010 } 1011 if (!sawValue) 1012 error ("V-032", null); 1013 1014 // ENUMERATION ... values match one of the tokens 1015 } else if (Attribute.ENUMERATION == attr.type()) { 1016 for (int i = 0; i < attr.values().length; i++) 1017 if (value.equals (attr.values()[i])) 1018 return; 1019 error ("V-029", new Object [] { value }); 1020 1021 // NOTATION values match a notation name 1022 } else if (Attribute.NOTATION == attr.type()) { 1023 // 1024 // XXX XML 1.0 spec should probably list references to 1025 // externally defined notations in standalone docs as 1026 // validity errors. Ditto externally defined unparsed 1027 // entities; neither should show up in attributes, else 1028 // one needs to read the external declarations in order 1029 // to make sense of the document (exactly what tagging 1030 // a doc as "standalone" intends you won't need to do). 1031 // 1032 for (int i = 0; i < attr.values().length; i++) 1033 if (value.equals (attr.values()[i])) 1034 return; 1035 error ("V-030", new Object [] { value }); 1036 1037 // ENTITY(IES) values match an unparsed entity(ies) 1038 } else if (Attribute.ENTITY == attr.type()) { 1039 // see note above re standalone 1040 if (!isUnparsedEntity (value)) 1041 error ("V-031", new Object [] { value }); 1042 1043 } else if (Attribute.ENTITIES == attr.type()) { 1044 StringTokenizer tokenizer = new StringTokenizer (value); 1045 boolean sawValue = false; 1046 1047 while (tokenizer.hasMoreTokens ()) { 1048 value = tokenizer.nextToken (); 1049 // see note above re standalone 1050 if (!isUnparsedEntity (value)) 1051 error ("V-031", new Object [] { value }); 1052 sawValue = true; 1053 } 1054 if (!sawValue) 1055 error ("V-040", null); 1056 1057 } else if (Attribute.CDATA != attr.type()) 1058 throw new InternalError (attr.type()); 1059 } 1060 */ 1061 /* 1062 private boolean isUnparsedEntity (String name) 1063 { 1064 Object e = entities.getNonInterned (name); 1065 if (e == null || !(e instanceof ExternalEntity)) 1066 return false; 1067 return ((ExternalEntity)e).notation != null; 1068 } 1069 */ 1070 private boolean maybeElementDecl() 1071 throws IOException, SAXException { 1072 1073 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' 1074 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 1075 InputEntity start = peekDeclaration("!ELEMENT"); 1076 1077 if (start == null) { 1078 return false; 1079 } 1080 1081 // n.b. for content models where inter-element whitespace is 1082 // ignorable, we mark that fact here. 1083 String name = getMarkupDeclname("F-015", true); 1084 // Element element = (Element) elements.get (name); 1085 // boolean declEffective = false; 1086 1087 /* 1088 if (element != null) { 1089 if (element.contentModel() != null) { 1090 error ("V-012", new Object [] { name }); 1091 } // else <!ATTLIST name ...> came first 1092 } else { 1093 element = new Element(name); 1094 elements.put (element.name(), element); 1095 declEffective = true; 1096 } 1097 */ 1098 if (declaredElements.contains(name)) { 1099 error("V-012", new Object[]{name}); 1100 } else { 1101 declaredElements.add(name); 1102 // declEffective = true; 1103 } 1104 1105 short modelType; 1106 whitespace("F-000"); 1107 if (peek(strEMPTY)) { 1108 /// // leave element.contentModel as null for this case. 1109 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); 1110 } else if (peek(strANY)) { 1111 /// element.setContentModel(new StringModel(StringModelType.ANY)); 1112 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); 1113 } else { 1114 modelType = getMixedOrChildren(name); 1115 } 1116 1117 dtdHandler.endContentModel(name, modelType); 1118 1119 maybeWhitespace(); 1120 char c = getc(); 1121 if (c != '>') { 1122 fatal("P-036", new Object[]{name, Character.valueOf(c)}); 1123 } 1124 if (start != in) { 1125 error("V-013", null); 1126 } 1127 1128 /// dtdHandler.elementDecl(element); 1129 1130 return true; 1131 } 1132 1133 // We're leaving the content model as a regular expression; 1134 // it's an efficient natural way to express such things, and 1135 // libraries often interpret them. No whitespace in the 1136 // model we store, though! 1137 /** 1138 * returns content model type. 1139 */ 1140 private short getMixedOrChildren(String elementName/*Element element*/) 1141 throws IOException, SAXException { 1142 1143 InputEntity start; 1144 1145 // [47] children ::= (choice|seq) ('?'|'*'|'+')? 1146 strTmp = new StringBuffer(); 1147 1148 nextChar('(', "F-028", elementName); 1149 start = in; 1150 maybeWhitespace(); 1151 strTmp.append('('); 1152 1153 short modelType; 1154 if (peek("#PCDATA")) { 1155 strTmp.append("#PCDATA"); 1156 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); 1157 getMixed(elementName, start); 1158 } else { 1159 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); 1160 getcps(elementName, start); 1161 } 1162 1163 return modelType; 1164 } 1165 1166 // '(' S? already consumed 1167 // matching ')' must be in "start" entity if validating 1168 private void getcps(/*Element element,*/String elementName, InputEntity start) 1169 throws IOException, SAXException { 1170 1171 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? 1172 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' 1173 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' 1174 boolean decided = false; 1175 char type = 0; 1176 // ContentModel retval, temp, current; 1177 1178 // retval = temp = current = null; 1179 1180 dtdHandler.startModelGroup(); 1181 1182 do { 1183 String tag; 1184 1185 tag = maybeGetName(); 1186 if (tag != null) { 1187 strTmp.append(tag); 1188 // temp = new ElementModel(tag); 1189 // getFrequency((RepeatableContent)temp); 1190 ///-> 1191 dtdHandler.childElement(tag, getFrequency()); 1192 ///<- 1193 } else if (peek("(")) { 1194 InputEntity next = in; 1195 strTmp.append('('); 1196 maybeWhitespace(); 1197 // temp = getcps(element, next); 1198 // getFrequency(temp); 1199 ///-> 1200 getcps(elementName, next); 1201 /// getFrequency(); <- this looks like a bug 1202 ///<- 1203 } else { 1204 fatal((type == 0) ? "P-039" 1205 : ((type == ',') ? "P-037" : "P-038"), 1206 new Object[]{Character.valueOf(getc())}); 1207 } 1208 1209 maybeWhitespace(); 1210 if (decided) { 1211 char c = getc(); 1212 1213 // if (current != null) { 1214 // current.addChild(temp); 1215 // } 1216 if (c == type) { 1217 strTmp.append(type); 1218 maybeWhitespace(); 1219 reportConnector(type); 1220 continue; 1221 } else if (c == '\u0029') { // rparen 1222 ungetc(); 1223 continue; 1224 } else { 1225 fatal((type == 0) ? "P-041" : "P-040", 1226 new Object[]{ 1227 Character.valueOf(c), 1228 Character.valueOf(type) 1229 }); 1230 } 1231 } else { 1232 type = getc(); 1233 switch (type) { 1234 case '|': 1235 case ',': 1236 reportConnector(type); 1237 break; 1238 default: 1239 // retval = temp; 1240 ungetc(); 1241 continue; 1242 } 1243 // retval = (ContentModel)current; 1244 decided = true; 1245 // current.addChild(temp); 1246 strTmp.append(type); 1247 } 1248 maybeWhitespace(); 1249 } while (!peek(")")); 1250 1251 if (in != start) { 1252 error("V-014", new Object[]{elementName}); 1253 } 1254 strTmp.append(')'); 1255 1256 dtdHandler.endModelGroup(getFrequency()); 1257 // return retval; 1258 } 1259 1260 private void reportConnector(char type) throws SAXException { 1261 switch (type) { 1262 case '|': 1263 dtdHandler.connector(DTDEventListener.CHOICE); ///<- 1264 return; 1265 case ',': 1266 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- 1267 return; 1268 default: 1269 throw new Error(); //assertion failed. 1270 } 1271 } 1272 1273 private short getFrequency() 1274 throws IOException, SAXException { 1275 1276 final char c = getc(); 1277 1278 if (c == '?') { 1279 strTmp.append(c); 1280 return DTDEventListener.OCCURENCE_ZERO_OR_ONE; 1281 // original.setRepeat(Repeat.ZERO_OR_ONE); 1282 } else if (c == '+') { 1283 strTmp.append(c); 1284 return DTDEventListener.OCCURENCE_ONE_OR_MORE; 1285 // original.setRepeat(Repeat.ONE_OR_MORE); 1286 } else if (c == '*') { 1287 strTmp.append(c); 1288 return DTDEventListener.OCCURENCE_ZERO_OR_MORE; 1289 // original.setRepeat(Repeat.ZERO_OR_MORE); 1290 } else { 1291 ungetc(); 1292 return DTDEventListener.OCCURENCE_ONCE; 1293 } 1294 } 1295 1296 // '(' S? '#PCDATA' already consumed 1297 // matching ')' must be in "start" entity if validating 1298 private void getMixed(String elementName, /*Element element,*/ InputEntity start) 1299 throws IOException, SAXException { 1300 1301 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' 1302 // | '(' S? '#PCDATA' S? ')' 1303 maybeWhitespace(); 1304 if (peek("\u0029*") || peek("\u0029")) { 1305 if (in != start) { 1306 error("V-014", new Object[]{elementName}); 1307 } 1308 strTmp.append(')'); 1309 // element.setContentModel(new StringModel(StringModelType.PCDATA)); 1310 return; 1311 } 1312 1313 ArrayList l = new ArrayList(); 1314 // l.add(new StringModel(StringModelType.PCDATA)); 1315 1316 1317 while (peek("|")) { 1318 String name; 1319 1320 strTmp.append('|'); 1321 maybeWhitespace(); 1322 1323 doLexicalPE = true; 1324 name = maybeGetName(); 1325 if (name == null) { 1326 fatal("P-042", new Object[]{elementName, Integer.toHexString(getc())}); 1327 } 1328 if (l.contains(name)) { 1329 error("V-015", new Object[]{name}); 1330 } else { 1331 l.add(name); 1332 dtdHandler.mixedElement(name); 1333 } 1334 strTmp.append(name); 1335 maybeWhitespace(); 1336 } 1337 1338 if (!peek("\u0029*")) // right paren 1339 { 1340 fatal("P-043", new Object[]{elementName, Character.valueOf(getc())}); 1341 } 1342 if (in != start) { 1343 error("V-014", new Object[]{elementName}); 1344 } 1345 strTmp.append(')'); 1346 // ChoiceModel cm = new ChoiceModel((Collection)l); 1347 // cm.setRepeat(Repeat.ZERO_OR_MORE); 1348 // element.setContentModel(cm); 1349 } 1350 1351 private boolean maybeAttlistDecl() 1352 throws IOException, SAXException { 1353 1354 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 1355 InputEntity start = peekDeclaration("!ATTLIST"); 1356 1357 if (start == null) { 1358 return false; 1359 } 1360 1361 String elementName = getMarkupDeclname("F-016", true); 1362 // Element element = (Element) elements.get (name); 1363 1364 // if (element == null) { 1365 // // not yet declared -- no problem. 1366 // element = new Element(name); 1367 // elements.put(name, element); 1368 // } 1369 1370 while (!peek(">")) { 1371 1372 // [53] AttDef ::= S Name S AttType S DefaultDecl 1373 // [54] AttType ::= StringType | TokenizedType | EnumeratedType 1374 1375 // look for global attribute definitions, don't expand for now... 1376 maybeWhitespace(); 1377 char c = getc(); 1378 if (c == '%') { 1379 String entityName = maybeGetName(); 1380 if (entityName != null) { 1381 nextChar(';', "F-021", entityName); 1382 whitespace("F-021"); 1383 continue; 1384 } else { 1385 fatal("P-011"); 1386 } 1387 } 1388 1389 ungetc(); 1390 // look for attribute name otherwise 1391 String attName = maybeGetName(); 1392 if (attName == null) { 1393 fatal("P-044", new Object[]{Character.valueOf(getc())}); 1394 } 1395 whitespace("F-001"); 1396 1397 /// Attribute a = new Attribute (name); 1398 1399 String typeName; 1400 Vector values = null; // notation/enumeration values 1401 1402 // Note: use the type constants from Attribute 1403 // so that "==" may be used (faster) 1404 1405 // [55] StringType ::= 'CDATA' 1406 if (peek(TYPE_CDATA)) /// a.setType(Attribute.CDATA); 1407 { 1408 typeName = TYPE_CDATA; 1409 } // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' 1410 // | 'ENTITY' | 'ENTITIES' 1411 // | 'NMTOKEN' | 'NMTOKENS' 1412 // n.b. if "IDREFS" is there, both "ID" and "IDREF" 1413 // match peekahead ... so this order matters! 1414 else if (peek(TYPE_IDREFS)) { 1415 typeName = TYPE_IDREFS; 1416 } else if (peek(TYPE_IDREF)) { 1417 typeName = TYPE_IDREF; 1418 } else if (peek(TYPE_ID)) { 1419 typeName = TYPE_ID; 1420 // TODO: should implement this error check? 1421 /// if (element.id() != null) { 1422 /// error ("V-016", new Object [] { element.id() }); 1423 /// } else 1424 /// element.setId(name); 1425 } else if (peek(TYPE_ENTITY)) { 1426 typeName = TYPE_ENTITY; 1427 } else if (peek(TYPE_ENTITIES)) { 1428 typeName = TYPE_ENTITIES; 1429 } else if (peek(TYPE_NMTOKENS)) { 1430 typeName = TYPE_NMTOKENS; 1431 } else if (peek(TYPE_NMTOKEN)) { 1432 typeName = TYPE_NMTOKEN; 1433 } // [57] EnumeratedType ::= NotationType | Enumeration 1434 // [58] NotationType ::= 'NOTATION' S '(' S? Name 1435 // (S? '|' S? Name)* S? ')' 1436 else if (peek(TYPE_NOTATION)) { 1437 typeName = TYPE_NOTATION; 1438 whitespace("F-002"); 1439 nextChar('(', "F-029", null); 1440 maybeWhitespace(); 1441 1442 values = new Vector(); 1443 do { 1444 String name; 1445 if ((name = maybeGetName()) == null) { 1446 fatal("P-068"); 1447 } 1448 // permit deferred declarations 1449 if (notations.get(name) == null) { 1450 notations.put(name, name); 1451 } 1452 values.addElement(name); 1453 maybeWhitespace(); 1454 if (peek("|")) { 1455 maybeWhitespace(); 1456 } 1457 } while (!peek(")")); 1458 /// a.setValues(new String [v.size ()]); 1459 /// for (int i = 0; i < v.size (); i++) 1460 /// a.setValue(i, (String)v.elementAt(i)); 1461 1462 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' 1463 } else if (peek("(")) { 1464 /// a.setType(Attribute.ENUMERATION); 1465 typeName = TYPE_ENUMERATION; 1466 1467 maybeWhitespace(); 1468 1469 /// Vector v = new Vector (); 1470 values = new Vector(); 1471 do { 1472 String name = getNmtoken(); 1473 /// v.addElement (name); 1474 values.addElement(name); 1475 maybeWhitespace(); 1476 if (peek("|")) { 1477 maybeWhitespace(); 1478 } 1479 } while (!peek(")")); 1480 /// a.setValues(new String [v.size ()]); 1481 /// for (int i = 0; i < v.size (); i++) 1482 /// a.setValue(i, (String)v.elementAt(i)); 1483 } else { 1484 fatal("P-045", 1485 new Object[]{attName, Character.valueOf(getc())}); 1486 typeName = null; 1487 } 1488 1489 short attributeUse; 1490 String defaultValue = null; 1491 1492 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' 1493 // | (('#FIXED' S)? AttValue) 1494 whitespace("F-003"); 1495 if (peek("#REQUIRED")) { 1496 attributeUse = DTDEventListener.USE_REQUIRED; 1497 } /// a.setIsRequired(true); 1498 else if (peek("#FIXED")) { 1499 /// if (a.type() == Attribute.ID) 1500 if (typeName == TYPE_ID) { 1501 error("V-017", new Object[]{attName}); 1502 } 1503 /// a.setIsFixed(true); 1504 attributeUse = DTDEventListener.USE_FIXED; 1505 whitespace("F-004"); 1506 parseLiteral(false); 1507 /// if (a.type() != Attribute.CDATA) 1508 /// a.setDefaultValue(normalize(false)); 1509 /// else 1510 /// a.setDefaultValue(strTmp.toString()); 1511 1512 if (typeName == TYPE_CDATA) { 1513 defaultValue = normalize(false); 1514 } else { 1515 defaultValue = strTmp.toString(); 1516 } 1517 1518 // TODO: implement this check 1519 /// if (a.type() != Attribute.CDATA) 1520 /// validateAttributeSyntax (a, a.defaultValue()); 1521 } else if (!peek("#IMPLIED")) { 1522 attributeUse = DTDEventListener.USE_IMPLIED; 1523 1524 /// if (a.type() == Attribute.ID) 1525 if (typeName == TYPE_ID) { 1526 error("V-018", new Object[]{attName}); 1527 } 1528 parseLiteral(false); 1529 /// if (a.type() != Attribute.CDATA) 1530 /// a.setDefaultValue(normalize(false)); 1531 /// else 1532 /// a.setDefaultValue(strTmp.toString()); 1533 if (typeName == TYPE_CDATA) { 1534 defaultValue = normalize(false); 1535 } else { 1536 defaultValue = strTmp.toString(); 1537 } 1538 1539 // TODO: implement this check 1540 /// if (a.type() != Attribute.CDATA) 1541 /// validateAttributeSyntax (a, a.defaultValue()); 1542 } else { 1543 // TODO: this looks like an fatal error. 1544 attributeUse = DTDEventListener.USE_NORMAL; 1545 } 1546 1547 if (XmlLang.equals(attName) 1548 && defaultValue/* a.defaultValue()*/ != null 1549 && !isXmlLang(defaultValue/*a.defaultValue()*/)) { 1550 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); 1551 } 1552 1553 // TODO: isn't it an error to specify the same attribute twice? 1554 /// if (!element.attributes().contains(a)) { 1555 /// element.addAttribute(a); 1556 /// dtdHandler.attributeDecl(a); 1557 /// } 1558 1559 String[] v = (values != null) ? (String[]) values.toArray(new String[values.size()]) : null; 1560 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); 1561 maybeWhitespace(); 1562 } 1563 if (start != in) { 1564 error("V-013", null); 1565 } 1566 return true; 1567 } 1568 1569 // used when parsing literal attribute values, 1570 // or public identifiers. 1571 // 1572 // input in strTmp 1573 private String normalize(boolean invalidIfNeeded) { 1574 1575 // this can allocate an extra string... 1576 1577 String s = strTmp.toString(); 1578 String s2 = s.trim(); 1579 boolean didStrip = false; 1580 1581 if (s != s2) { 1582 s = s2; 1583 didStrip = true; 1584 } 1585 strTmp = new StringBuffer(); 1586 for (int i = 0; i < s.length(); i++) { 1587 char c = s.charAt(i); 1588 if (!XmlChars.isSpace(c)) { 1589 strTmp.append(c); 1590 continue; 1591 } 1592 strTmp.append(' '); 1593 while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) { 1594 didStrip = true; 1595 } 1596 i--; 1597 } 1598 if (didStrip) { 1599 return strTmp.toString(); 1600 } else { 1601 return s; 1602 } 1603 } 1604 1605 private boolean maybeConditionalSect() 1606 throws IOException, SAXException { 1607 1608 // [61] conditionalSect ::= includeSect | ignoreSect 1609 1610 if (!peek("<![")) { 1611 return false; 1612 } 1613 1614 String keyword; 1615 InputEntity start = in; 1616 1617 maybeWhitespace(); 1618 1619 if ((keyword = maybeGetName()) == null) { 1620 fatal("P-046"); 1621 } 1622 maybeWhitespace(); 1623 nextChar('[', "F-030", null); 1624 1625 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' 1626 // extSubsetDecl ']]>' 1627 if ("INCLUDE".equals(keyword)) { 1628 for (;;) { 1629 while (in.isEOF() && in != start) { 1630 in = in.pop(); 1631 } 1632 if (in.isEOF()) { 1633 error("V-020", null); 1634 } 1635 if (peek("]]>")) { 1636 break; 1637 } 1638 1639 doLexicalPE = false; 1640 if (maybeWhitespace()) { 1641 continue; 1642 } 1643 if (maybePEReference()) { 1644 continue; 1645 } 1646 doLexicalPE = true; 1647 if (maybeMarkupDecl() || maybeConditionalSect()) { 1648 continue; 1649 } 1650 1651 fatal("P-047"); 1652 } 1653 1654 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' 1655 // ignoreSectcontents ']]>' 1656 // [64] ignoreSectcontents ::= Ignore ('<![' 1657 // ignoreSectcontents ']]>' Ignore)* 1658 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 1659 } else if ("IGNORE".equals(keyword)) { 1660 int nestlevel = 1; 1661 // ignoreSectcontents 1662 doLexicalPE = false; 1663 while (nestlevel > 0) { 1664 char c = getc(); // will pop input entities 1665 if (c == '<') { 1666 if (peek("![")) { 1667 nestlevel++; 1668 } 1669 } else if (c == ']') { 1670 if (peek("]>")) { 1671 nestlevel--; 1672 } 1673 } else { 1674 continue; 1675 } 1676 } 1677 } else { 1678 fatal("P-048", new Object[]{keyword}); 1679 } 1680 return true; 1681 } 1682 1683 // 1684 // CHAPTER 4: Physical Structures 1685 // 1686 // parse decimal or hex numeric character reference 1687 private int parseCharNumber() 1688 throws IOException, SAXException { 1689 1690 char c; 1691 int retval = 0; 1692 1693 // n.b. we ignore overflow ... 1694 if (getc() != 'x') { 1695 ungetc(); 1696 for (;;) { 1697 c = getc(); 1698 if (c >= '0' && c <= '9') { 1699 retval *= 10; 1700 retval += (c - '0'); 1701 continue; 1702 } 1703 if (c == ';') { 1704 return retval; 1705 } 1706 fatal("P-049"); 1707 } 1708 } else { 1709 for (;;) { 1710 c = getc(); 1711 if (c >= '0' && c <= '9') { 1712 retval <<= 4; 1713 retval += (c - '0'); 1714 continue; 1715 } 1716 if (c >= 'a' && c <= 'f') { 1717 retval <<= 4; 1718 retval += 10 + (c - 'a'); 1719 continue; 1720 } 1721 if (c >= 'A' && c <= 'F') { 1722 retval <<= 4; 1723 retval += 10 + (c - 'A'); 1724 continue; 1725 } 1726 if (c == ';') { 1727 return retval; 1728 } 1729 fatal("P-050"); 1730 } 1731 } 1732 } 1733 1734 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, 1735 // though still subject to the 'Char' construct in XML 1736 private int surrogatesToCharTmp(int ucs4) 1737 throws SAXException { 1738 1739 if (ucs4 <= 0xffff) { 1740 if (XmlChars.isChar(ucs4)) { 1741 charTmp[0] = (char) ucs4; 1742 return 1; 1743 } 1744 } else if (ucs4 <= 0x0010ffff) { 1745 // we represent these as UNICODE surrogate pairs 1746 ucs4 -= 0x10000; 1747 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); 1748 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); 1749 return 2; 1750 } 1751 fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); 1752 // NOTREACHED 1753 return -1; 1754 } 1755 1756 private boolean maybePEReference() 1757 throws IOException, SAXException { 1758 1759 // This is the SYNTACTIC version of this construct. 1760 // When processing external entities, there is also 1761 // a LEXICAL version; see getc() and doLexicalPE. 1762 1763 // [69] PEReference ::= '%' Name ';' 1764 if (!in.peekc('%')) { 1765 return false; 1766 } 1767 1768 String name = maybeGetName(); 1769 Object entity; 1770 1771 if (name == null) { 1772 fatal("P-011"); 1773 } 1774 nextChar(';', "F-021", name); 1775 entity = params.get(name); 1776 1777 if (entity instanceof InternalEntity) { 1778 InternalEntity value = (InternalEntity) entity; 1779 pushReader(value.buf, name, false); 1780 1781 } else if (entity instanceof ExternalEntity) { 1782 pushReader((ExternalEntity) entity); 1783 externalParameterEntity((ExternalEntity) entity); 1784 1785 } else if (entity == null) { 1786 error("V-022", new Object[]{name}); 1787 } 1788 return true; 1789 } 1790 1791 private boolean maybeEntityDecl() 1792 throws IOException, SAXException { 1793 1794 // [70] EntityDecl ::= GEDecl | PEDecl 1795 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 1796 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' 1797 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 1798 // [74] PEDef ::= EntityValue | ExternalID 1799 // 1800 InputEntity start = peekDeclaration("!ENTITY"); 1801 1802 if (start == null) { 1803 return false; 1804 } 1805 1806 String entityName; 1807 SimpleHashtable defns; 1808 ExternalEntity externalId; 1809 boolean doStore; 1810 1811 // PE expansion gets selectively turned off several places: 1812 // in ENTITY declarations (here), in comments, in PIs. 1813 1814 // Here, we allow PE entities to be declared, and allows 1815 // literals to include PE refs without the added spaces 1816 // required with their expansion in markup decls. 1817 1818 doLexicalPE = false; 1819 whitespace("F-005"); 1820 if (in.peekc('%')) { 1821 whitespace("F-006"); 1822 defns = params; 1823 } else { 1824 defns = entities; 1825 } 1826 1827 ungetc(); // leave some whitespace 1828 doLexicalPE = true; 1829 entityName = getMarkupDeclname("F-017", false); 1830 whitespace("F-007"); 1831 externalId = maybeExternalID(); 1832 1833 // 1834 // first definition sticks ... e.g. internal subset PEs are used 1835 // to override DTD defaults. It's also an "error" to incorrectly 1836 // redefine builtin internal entities, but since reporting such 1837 // errors is optional we only give warnings ("just in case") for 1838 // non-parameter entities. 1839 // 1840 doStore = (defns.get(entityName) == null); 1841 if (!doStore && defns == entities) { 1842 warning("P-054", new Object[]{entityName}); 1843 } 1844 1845 // internal entities 1846 if (externalId == null) { 1847 char value[]; 1848 InternalEntity entity; 1849 1850 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" 1851 parseLiteral(true); 1852 doLexicalPE = true; 1853 if (doStore) { 1854 value = new char[strTmp.length()]; 1855 if (value.length != 0) { 1856 strTmp.getChars(0, value.length, value, 0); 1857 } 1858 entity = new InternalEntity(entityName, value); 1859 entity.isPE = (defns == params); 1860 defns.put(entityName, entity); 1861 if (defns == entities) { 1862 dtdHandler.internalGeneralEntityDecl(entityName, 1863 new String(value)); 1864 } 1865 } 1866 1867 // external entities (including unparsed) 1868 } else { 1869 // [76] NDataDecl ::= S 'NDATA' S Name 1870 if (defns == entities && maybeWhitespace() 1871 && peek("NDATA")) { 1872 externalId.notation = getMarkupDeclname("F-018", false); 1873 1874 // flag undeclared notation for checking after 1875 // the DTD is fully processed 1876 if (notations.get(externalId.notation) == null) { 1877 notations.put(externalId.notation, Boolean.TRUE); 1878 } 1879 } 1880 externalId.name = entityName; 1881 externalId.isPE = (defns == params); 1882 if (doStore) { 1883 defns.put(entityName, externalId); 1884 if (externalId.notation != null) { 1885 dtdHandler.unparsedEntityDecl(entityName, 1886 externalId.publicId, externalId.systemId, 1887 externalId.notation); 1888 } else if (defns == entities) { 1889 dtdHandler.externalGeneralEntityDecl(entityName, 1890 externalId.publicId, externalId.systemId); 1891 } 1892 } 1893 } 1894 maybeWhitespace(); 1895 nextChar('>', "F-031", entityName); 1896 if (start != in) { 1897 error("V-013", null); 1898 } 1899 return true; 1900 } 1901 1902 private ExternalEntity maybeExternalID() 1903 throws IOException, SAXException { 1904 1905 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral 1906 // | 'PUBLIC' S' PubidLiteral S Systemliteral 1907 String temp = null; 1908 ExternalEntity retval; 1909 1910 if (peek("PUBLIC")) { 1911 whitespace("F-009"); 1912 temp = parsePublicId(); 1913 } else if (!peek("SYSTEM")) { 1914 return null; 1915 } 1916 1917 retval = new ExternalEntity(in); 1918 retval.publicId = temp; 1919 whitespace("F-008"); 1920 retval.systemId = parseSystemId(); 1921 return retval; 1922 } 1923 1924 private String parseSystemId() 1925 throws IOException, SAXException { 1926 1927 String uri = getQuotedString("F-034", null); 1928 int temp = uri.indexOf(':'); 1929 1930 // resolve relative URIs ... must do it here since 1931 // it's relative to the source file holding the URI! 1932 1933 // "new java.net.URL (URL, string)" conforms to RFC 1630, 1934 // but we can't use that except when the URI is a URL. 1935 // The entity resolver is allowed to handle URIs that are 1936 // not URLs, so we pass URIs through with scheme intact 1937 if (temp == -1 || uri.indexOf('/') < temp) { 1938 String baseURI; 1939 1940 baseURI = in.getSystemId(); 1941 if (baseURI == null) { 1942 fatal("P-055", new Object[]{uri}); 1943 } 1944 if (uri.length() == 0) { 1945 uri = "."; 1946 } 1947 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); 1948 if (uri.charAt(0) != '/') { 1949 uri = baseURI + uri; 1950 } else { 1951 // XXX slashes at the beginning of a relative URI are 1952 // a special case we don't handle. 1953 throw new InternalError(); 1954 } 1955 1956 // letting other code map any "/xxx/../" or "/./" to "/", 1957 // since all URIs must handle it the same. 1958 } 1959 // check for fragment ID in URI 1960 if (uri.indexOf('#') != -1) { 1961 error("P-056", new Object[]{uri}); 1962 } 1963 return uri; 1964 } 1965 1966 private void maybeTextDecl() 1967 throws IOException, SAXException { 1968 1969 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 1970 if (peek("<?xml")) { 1971 readVersion(false, "1.0"); 1972 readEncoding(true); 1973 maybeWhitespace(); 1974 if (!peek("?>")) { 1975 fatal("P-057"); 1976 } 1977 } 1978 } 1979 1980 private void externalParameterEntity(ExternalEntity next) 1981 throws IOException, SAXException { 1982 1983 // 1984 // Reap the intended benefits of standalone declarations: 1985 // don't deal with external parameter entities, except to 1986 // validate the standalone declaration. 1987 // 1988 1989 // n.b. "in external parameter entities" (and external 1990 // DTD subset, same grammar) parameter references can 1991 // occur "within" markup declarations ... expansions can 1992 // cross syntax rules. Flagged here; affects getc(). 1993 1994 // [79] ExtPE ::= TextDecl? extSubsetDecl 1995 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 1996 // | PEReference | S )* 1997 InputEntity pe; 1998 1999 // XXX if this returns false ... 2000 2001 pe = in; 2002 maybeTextDecl(); 2003 while (!pe.isEOF()) { 2004 // pop internal PEs (and whitespace before/after) 2005 if (in.isEOF()) { 2006 in = in.pop(); 2007 continue; 2008 } 2009 doLexicalPE = false; 2010 if (maybeWhitespace()) { 2011 continue; 2012 } 2013 if (maybePEReference()) { 2014 continue; 2015 } 2016 doLexicalPE = true; 2017 if (maybeMarkupDecl() || maybeConditionalSect()) { 2018 continue; 2019 } 2020 break; 2021 } 2022 // if (in != pe) throw new InternalError("who popped my PE?"); 2023 if (!pe.isEOF()) { 2024 fatal("P-059", new Object[]{in.getName()}); 2025 } 2026 } 2027 2028 private void readEncoding(boolean must) 2029 throws IOException, SAXException { 2030 2031 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 2032 String name = maybeReadAttribute("encoding", must); 2033 2034 if (name == null) { 2035 return; 2036 } 2037 for (int i = 0; i < name.length(); i++) { 2038 char c = name.charAt(i); 2039 if ((c >= 'A' && c <= 'Z') 2040 || (c >= 'a' && c <= 'z')) { 2041 continue; 2042 } 2043 if (i != 0 2044 && ((c >= '0' && c <= '9') 2045 || c == '-' 2046 || c == '_' 2047 || c == '.')) { 2048 continue; 2049 } 2050 fatal("P-060", new Object[]{Character.valueOf(c)}); 2051 } 2052 2053 // 2054 // This should be the encoding in use, and it's even an error for 2055 // it to be anything else (in certain cases that are impractical to 2056 // to test, and may even be insufficient). So, we do the best we 2057 // can, and warn if things look suspicious. Note that Java doesn't 2058 // uniformly expose the encodings, and that the names it uses 2059 // internally are nonstandard. Also, that the XML spec allows 2060 // such "errors" not to be reported at all. 2061 // 2062 String currentEncoding = in.getEncoding(); 2063 2064 if (currentEncoding != null 2065 && !name.equalsIgnoreCase(currentEncoding)) { 2066 warning("P-061", new Object[]{name, currentEncoding}); 2067 } 2068 } 2069 2070 private boolean maybeNotationDecl() 2071 throws IOException, SAXException { 2072 2073 // [82] NotationDecl ::= '<!NOTATION' S Name S 2074 // (ExternalID | PublicID) S? '>' 2075 // [83] PublicID ::= 'PUBLIC' S PubidLiteral 2076 InputEntity start = peekDeclaration("!NOTATION"); 2077 2078 if (start == null) { 2079 return false; 2080 } 2081 2082 String name = getMarkupDeclname("F-019", false); 2083 ExternalEntity entity = new ExternalEntity(in); 2084 2085 whitespace("F-011"); 2086 if (peek("PUBLIC")) { 2087 whitespace("F-009"); 2088 entity.publicId = parsePublicId(); 2089 if (maybeWhitespace()) { 2090 if (!peek(">")) { 2091 entity.systemId = parseSystemId(); 2092 } else { 2093 ungetc(); 2094 } 2095 } 2096 } else if (peek("SYSTEM")) { 2097 whitespace("F-008"); 2098 entity.systemId = parseSystemId(); 2099 } else { 2100 fatal("P-062"); 2101 } 2102 maybeWhitespace(); 2103 nextChar('>', "F-032", name); 2104 if (start != in) { 2105 error("V-013", null); 2106 } 2107 if (entity.systemId != null && entity.systemId.indexOf('#') != -1) { 2108 error("P-056", new Object[]{entity.systemId}); 2109 } 2110 2111 Object value = notations.get(name); 2112 if (value != null && value instanceof ExternalEntity) { 2113 warning("P-063", new Object[]{name}); 2114 } else { 2115 notations.put(name, entity); 2116 dtdHandler.notationDecl(name, entity.publicId, 2117 entity.systemId); 2118 } 2119 return true; 2120 } 2121 2122 //////////////////////////////////////////////////////////////// 2123 // 2124 // UTILITIES 2125 // 2126 //////////////////////////////////////////////////////////////// 2127 private char getc() throws IOException, SAXException { 2128 2129 if (!doLexicalPE) { 2130 char c = in.getc(); 2131 return c; 2132 } 2133 2134 // 2135 // External parameter entities get funky processing of '%param;' 2136 // references. It's not clearly defined in the XML spec; but it 2137 // boils down to having those refs be _lexical_ in most cases to 2138 // include partial syntax productions. It also needs selective 2139 // enabling; "<!ENTITY % foo ...>" must work, for example, and 2140 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" 2141 // if it's expanded in a literal, else "ab cd". PEs also do 2142 // not expand within comments or PIs, and external PEs are only 2143 // allowed to have markup decls (and so aren't handled lexically). 2144 // 2145 // This PE handling should be merged into maybeWhitespace, where 2146 // it can be dealt with more consistently. 2147 // 2148 // Also, there are some validity constraints in this area. 2149 // 2150 char c; 2151 2152 while (in.isEOF()) { 2153 if (in.isInternal() || (doLexicalPE && !in.isDocument())) { 2154 in = in.pop(); 2155 } else { 2156 fatal("P-064", new Object[]{in.getName()}); 2157 } 2158 } 2159 if ((c = in.getc()) == '%' && doLexicalPE) { 2160 // PE ref ::= '%' name ';' 2161 String name = maybeGetName(); 2162 Object entity; 2163 2164 if (name == null) { 2165 fatal("P-011"); 2166 } 2167 nextChar(';', "F-021", name); 2168 entity = params.get(name); 2169 2170 // push a magic "entity" before and after the 2171 // real one, so ungetc() behaves uniformly 2172 pushReader(" ".toCharArray(), null, false); 2173 if (entity instanceof InternalEntity) { 2174 pushReader(((InternalEntity) entity).buf, name, false); 2175 } else if (entity instanceof ExternalEntity) // PEs can't be unparsed! 2176 // XXX if this returns false ... 2177 { 2178 pushReader((ExternalEntity) entity); 2179 } else if (entity == null) // see note in maybePEReference re making this be nonfatal. 2180 { 2181 fatal("V-022"); 2182 } else { 2183 throw new InternalError(); 2184 } 2185 pushReader(" ".toCharArray(), null, false); 2186 return in.getc(); 2187 } 2188 return c; 2189 } 2190 2191 private void ungetc() { 2192 2193 in.ungetc(); 2194 } 2195 2196 private boolean peek(String s) 2197 throws IOException, SAXException { 2198 2199 return in.peek(s, null); 2200 } 2201 2202 // Return the entity starting the specified declaration 2203 // (for validating declaration nesting) else null. 2204 private InputEntity peekDeclaration(String s) 2205 throws IOException, SAXException { 2206 2207 InputEntity start; 2208 2209 if (!in.peekc('<')) { 2210 return null; 2211 } 2212 start = in; 2213 if (in.peek(s, null)) { 2214 return start; 2215 } 2216 in.ungetc(); 2217 return null; 2218 } 2219 2220 private void nextChar(char c, String location, String near) 2221 throws IOException, SAXException { 2222 2223 while (in.isEOF() && !in.isDocument()) { 2224 in = in.pop(); 2225 } 2226 if (!in.peekc(c)) { 2227 fatal("P-008", new Object[]{Character.valueOf(c), 2228 messages.getMessage(locale, location), 2229 (near == null ? "" : ('"' + near + '"'))}); 2230 } 2231 } 2232 2233 private void pushReader(char buf[], String name, boolean isGeneral) 2234 throws SAXException { 2235 2236 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 2237 r.init(buf, name, in, !isGeneral); 2238 in = r; 2239 } 2240 2241 private boolean pushReader(ExternalEntity next) 2242 throws IOException, SAXException { 2243 2244 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 2245 InputSource s; 2246 try { 2247 s = next.getInputSource(resolver); 2248 } catch (IOException e) { 2249 String msg = 2250 "unable to open the external entity from :" + next.systemId; 2251 if (next.publicId != null) { 2252 msg += " (public id:" + next.publicId + ")"; 2253 } 2254 2255 SAXParseException spe = new SAXParseException(msg, 2256 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); 2257 dtdHandler.fatalError(spe); 2258 throw e; 2259 } 2260 2261 r.init(s, next.name, in, next.isPE); 2262 in = r; 2263 return true; 2264 } 2265 2266 public String getPublicId() { 2267 2268 return (in == null) ? null : in.getPublicId(); 2269 } 2270 2271 public String getSystemId() { 2272 2273 return (in == null) ? null : in.getSystemId(); 2274 } 2275 2276 public int getLineNumber() { 2277 2278 return (in == null) ? -1 : in.getLineNumber(); 2279 } 2280 2281 public int getColumnNumber() { 2282 2283 return (in == null) ? -1 : in.getColumnNumber(); 2284 } 2285 2286 // error handling convenience routines 2287 private void warning(String messageId, Object parameters[]) 2288 throws SAXException { 2289 2290 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2291 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2292 2293 dtdHandler.warning(e); 2294 } 2295 2296 void error(String messageId, Object parameters[]) 2297 throws SAXException { 2298 2299 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2300 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2301 2302 dtdHandler.error(e); 2303 } 2304 2305 private void fatal(String messageId) throws SAXException { 2306 2307 fatal(messageId, null); 2308 } 2309 2310 private void fatal(String messageId, Object parameters[]) 2311 throws SAXException { 2312 2313 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2314 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2315 2316 dtdHandler.fatalError(e); 2317 2318 throw e; 2319 } 2320 2321 // 2322 // Map char arrays to strings ... cuts down both on memory and 2323 // CPU usage for element/attribute/other names that are reused. 2324 // 2325 // Documents typically repeat names a lot, so we more or less 2326 // intern all the strings within the document; since some strings 2327 // are repeated in multiple documents (e.g. stylesheets) we go 2328 // a bit further, and intern globally. 2329 // 2330 static class NameCache { 2331 // 2332 // Unless we auto-grow this, the default size should be a 2333 // reasonable bit larger than needed for most XML files 2334 // we've yet seen (and be prime). If it's too small, the 2335 // penalty is just excess cache collisions. 2336 // 2337 2338 NameCacheEntry hashtable[] = new NameCacheEntry[541]; 2339 2340 // 2341 // Usually we just want to get the 'symbol' for these chars 2342 // 2343 String lookup(char value[], int len) { 2344 2345 return lookupEntry(value, len).name; 2346 } 2347 2348 // 2349 // Sometimes we need to scan the chars in the resulting 2350 // string, so there's an accessor which exposes them. 2351 // (Mostly for element end tags.) 2352 // 2353 NameCacheEntry lookupEntry(char value[], int len) { 2354 2355 int index = 0; 2356 NameCacheEntry entry; 2357 2358 // hashing to get index 2359 for (int i = 0; i < len; i++) { 2360 index = index * 31 + value[i]; 2361 } 2362 index &= 0x7fffffff; 2363 index %= hashtable.length; 2364 2365 // return entry if one's there ... 2366 for (entry = hashtable[index]; 2367 entry != null; 2368 entry = entry.next) { 2369 if (entry.matches(value, len)) { 2370 return entry; 2371 } 2372 } 2373 2374 // else create new one 2375 entry = new NameCacheEntry(); 2376 entry.chars = new char[len]; 2377 System.arraycopy(value, 0, entry.chars, 0, len); 2378 entry.name = new String(entry.chars); 2379 // 2380 // NOTE: JDK 1.1 has a fixed size string intern table, 2381 // with non-GC'd entries. It can panic here; that's a 2382 // JDK problem, use 1.2 or later with many identifiers. 2383 // 2384 entry.name = entry.name.intern(); // "global" intern 2385 entry.next = hashtable[index]; 2386 hashtable[index] = entry; 2387 return entry; 2388 } 2389 } 2390 2391 static class NameCacheEntry { 2392 2393 String name; 2394 char chars[]; 2395 NameCacheEntry next; 2396 2397 boolean matches(char value[], int len) { 2398 if (chars == null || chars.length != len) { 2399 return false; 2400 } 2401 for (int i = 0; i < len; i++) { 2402 if (value[i] != chars[i]) { 2403 return false; 2404 } 2405 } 2406 return true; 2407 } 2408 } 2409 2410 // 2411 // Message catalog for diagnostics. 2412 // 2413 static final Catalog messages = new Catalog(); 2414 2415 static final class Catalog extends MessageCatalog { 2416 2417 Catalog() { 2418 super(DTDParser.class); 2419 } 2420 } 2421 }