1 /* 2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.org.apache.xerces.internal.impl.Constants; 24 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 30 import com.sun.org.apache.xerces.internal.util.*; 31 import com.sun.org.apache.xerces.internal.util.URI; 32 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 38 import com.sun.org.apache.xerces.internal.xni.XNIException; 39 import com.sun.org.apache.xerces.internal.xni.parser.*; 40 import com.sun.xml.internal.stream.Entity; 41 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 42 import com.sun.xml.internal.stream.StaxXMLInputSource; 43 import com.sun.xml.internal.stream.XMLEntityStorage; 44 import java.io.*; 45 import java.lang.reflect.Method; 46 import java.net.HttpURLConnection; 47 import java.net.URISyntaxException; 48 import java.net.URL; 49 import java.net.URLConnection; 50 import java.util.HashMap; 51 import java.util.Iterator; 52 import java.util.Locale; 53 import java.util.Map; 54 import java.util.Stack; 55 import java.util.StringTokenizer; 56 import javax.xml.stream.XMLInputFactory; 57 58 59 /** 60 * Will keep track of current entity. 61 * 62 * The entity manager handles the registration of general and parameter 63 * entities; resolves entities; and starts entities. The entity manager 64 * is a central component in a standard parser configuration and this 65 * class works directly with the entity scanner to manage the underlying 66 * xni. 67 * <p> 68 * This component requires the following features and properties from the 69 * component manager that uses it: 70 * <ul> 71 * <li>http://xml.org/sax/features/validation</li> 72 * <li>http://xml.org/sax/features/external-general-entities</li> 73 * <li>http://xml.org/sax/features/external-parameter-entities</li> 74 * <li>http://apache.org/xml/features/allow-java-encodings</li> 75 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 76 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 77 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 78 * </ul> 79 * 80 * 81 * @author Andy Clark, IBM 82 * @author Arnaud Le Hors, IBM 83 * @author K.Venugopal SUN Microsystems 84 * @author Neeraj Bajaj SUN Microsystems 85 * @author Sunitha Reddy SUN Microsystems 86 */ 87 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 88 89 // 90 // Constants 91 // 92 93 /** Default buffer size (2048). */ 94 public static final int DEFAULT_BUFFER_SIZE = 8192; 95 96 /** Default buffer size before we've finished with the XMLDecl: */ 97 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 98 99 /** Default internal entity buffer size (1024). */ 100 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 101 102 // feature identifiers 103 104 /** Feature identifier: validation. */ 105 protected static final String VALIDATION = 106 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 107 108 /** 109 * standard uri conformant (strict uri). 110 * http://apache.org/xml/features/standard-uri-conformant 111 */ 112 protected boolean fStrictURI; 113 114 115 /** Feature identifier: external general entities. */ 116 protected static final String EXTERNAL_GENERAL_ENTITIES = 117 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 118 119 /** Feature identifier: external parameter entities. */ 120 protected static final String EXTERNAL_PARAMETER_ENTITIES = 121 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 122 123 /** Feature identifier: allow Java encodings. */ 124 protected static final String ALLOW_JAVA_ENCODINGS = 125 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 126 127 /** Feature identifier: warn on duplicate EntityDef */ 128 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 129 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 130 131 /** Feature identifier: load external DTD. */ 132 protected static final String LOAD_EXTERNAL_DTD = 133 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 134 135 // property identifiers 136 137 /** Property identifier: symbol table. */ 138 protected static final String SYMBOL_TABLE = 139 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 140 141 /** Property identifier: error reporter. */ 142 protected static final String ERROR_REPORTER = 143 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 144 145 /** Feature identifier: standard uri conformant */ 146 protected static final String STANDARD_URI_CONFORMANT = 147 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 148 149 /** Property identifier: entity resolver. */ 150 protected static final String ENTITY_RESOLVER = 151 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 152 153 protected static final String STAX_ENTITY_RESOLVER = 154 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 155 156 // property identifier: ValidationManager 157 protected static final String VALIDATION_MANAGER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 159 160 /** property identifier: buffer size. */ 161 protected static final String BUFFER_SIZE = 162 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 163 164 /** property identifier: security manager. */ 165 protected static final String SECURITY_MANAGER = 166 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 167 168 protected static final String PARSER_SETTINGS = 169 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol */ 176 static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 177 178 // recognized features and properties 179 180 /** Recognized features. */ 181 private static final String[] RECOGNIZED_FEATURES = { 182 VALIDATION, 183 EXTERNAL_GENERAL_ENTITIES, 184 EXTERNAL_PARAMETER_ENTITIES, 185 ALLOW_JAVA_ENCODINGS, 186 WARN_ON_DUPLICATE_ENTITYDEF, 187 STANDARD_URI_CONFORMANT 188 }; 189 190 /** Feature defaults. */ 191 private static final Boolean[] FEATURE_DEFAULTS = { 192 null, 193 Boolean.TRUE, 194 Boolean.TRUE, 195 Boolean.TRUE, 196 Boolean.FALSE, 197 Boolean.FALSE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_RESOLVER, 205 VALIDATION_MANAGER, 206 BUFFER_SIZE, 207 SECURITY_MANAGER, 208 XML_SECURITY_PROPERTY_MANAGER 209 }; 210 211 /** Property defaults. */ 212 private static final Object[] PROPERTY_DEFAULTS = { 213 null, 214 null, 215 null, 216 null, 217 new Integer(DEFAULT_BUFFER_SIZE), 218 null, 219 null 220 }; 221 222 private static final String XMLEntity = "[xml]".intern(); 223 private static final String DTDEntity = "[dtd]".intern(); 224 225 // debugging 226 227 /** 228 * Debug printing of buffer. This debugging flag works best when you 229 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 230 * 64 characters. 231 */ 232 private static final boolean DEBUG_BUFFER = false; 233 234 /** warn on duplicate Entity declaration. 235 * http://apache.org/xml/features/warn-on-duplicate-entitydef 236 */ 237 protected boolean fWarnDuplicateEntityDef; 238 239 /** Debug some basic entities. */ 240 private static final boolean DEBUG_ENTITIES = false; 241 242 /** Debug switching readers for encodings. */ 243 private static final boolean DEBUG_ENCODINGS = false; 244 245 // should be diplayed trace resolving messages 246 private static final boolean DEBUG_RESOLVER = false ; 247 248 // 249 // Data 250 // 251 252 // features 253 254 /** 255 * Validation. This feature identifier is: 256 * http://xml.org/sax/features/validation 257 */ 258 protected boolean fValidation; 259 260 /** 261 * External general entities. This feature identifier is: 262 * http://xml.org/sax/features/external-general-entities 263 */ 264 protected boolean fExternalGeneralEntities; 265 266 /** 267 * External parameter entities. This feature identifier is: 268 * http://xml.org/sax/features/external-parameter-entities 269 */ 270 protected boolean fExternalParameterEntities; 271 272 /** 273 * Allow Java encoding names. This feature identifier is: 274 * http://apache.org/xml/features/allow-java-encodings 275 */ 276 protected boolean fAllowJavaEncodings = true ; 277 278 /** Load external DTD. */ 279 protected boolean fLoadExternalDTD = true; 280 281 // properties 282 283 /** 284 * Symbol table. This property identifier is: 285 * http://apache.org/xml/properties/internal/symbol-table 286 */ 287 protected SymbolTable fSymbolTable; 288 289 /** 290 * Error reporter. This property identifier is: 291 * http://apache.org/xml/properties/internal/error-reporter 292 */ 293 protected XMLErrorReporter fErrorReporter; 294 295 /** 296 * Entity resolver. This property identifier is: 297 * http://apache.org/xml/properties/internal/entity-resolver 298 */ 299 protected XMLEntityResolver fEntityResolver; 300 301 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 302 303 protected StaxEntityResolverWrapper fStaxEntityResolver; 304 305 /** Property Manager. This is used from Stax */ 306 protected PropertyManager fPropertyManager ; 307 308 /** StAX properties */ 309 boolean fSupportDTD = true; 310 boolean fReplaceEntityReferences = true; 311 boolean fSupportExternalEntities = true; 312 313 /** used to restrict external access */ 314 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 315 316 // settings 317 318 /** 319 * Validation manager. This property identifier is: 320 * http://apache.org/xml/properties/internal/validation-manager 321 */ 322 protected ValidationManager fValidationManager; 323 324 // settings 325 326 /** 327 * Buffer size. We get this value from a property. The default size 328 * is used if the input buffer size property is not specified. 329 * REVISIT: do we need a property for internal entity buffer size? 330 */ 331 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 332 333 /** Security Manager */ 334 protected XMLSecurityManager fSecurityManager = null; 335 336 protected XMLLimitAnalyzer fLimitAnalyzer = null; 337 338 protected int entityExpansionIndex; 339 340 /** 341 * True if the document entity is standalone. This should really 342 * only be set by the document source (e.g. XMLDocumentScanner). 343 */ 344 protected boolean fStandalone; 345 346 // are the entities being parsed in the external subset? 347 // NOTE: this *is not* the same as whether they're external entities! 348 protected boolean fInExternalSubset = false; 349 350 351 // handlers 352 /** Entity handler. */ 353 protected XMLEntityHandler fEntityHandler; 354 355 /** Current entity scanner */ 356 protected XMLEntityScanner fEntityScanner ; 357 358 /** XML 1.0 entity scanner. */ 359 protected XMLEntityScanner fXML10EntityScanner; 360 361 /** XML 1.1 entity scanner. */ 362 protected XMLEntityScanner fXML11EntityScanner; 363 364 /** count of entities expanded: */ 365 protected int fEntityExpansionCount = 0; 366 367 // entities 368 369 /** Entities. */ 370 protected Map<String, Entity> fEntities = new HashMap<>(); 371 372 /** Entity stack. */ 373 protected Stack fEntityStack = new Stack(); 374 375 /** Current entity. */ 376 protected Entity.ScannedEntity fCurrentEntity = null; 377 378 /** identify if the InputSource is created by a resolver */ 379 boolean fISCreatedByResolver = false; 380 381 // shared context 382 383 protected XMLEntityStorage fEntityStorage ; 384 385 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 386 387 388 // temp vars 389 390 /** Resource identifer. */ 391 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 392 393 /** Augmentations for entities. */ 394 private final Augmentations fEntityAugs = new AugmentationsImpl(); 395 396 /** Pool of character buffers. */ 397 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 398 399 // 400 // Constructors 401 // 402 403 /** 404 * If this constructor is used to create the object, reset() should be invoked on this object 405 */ 406 public XMLEntityManager() { 407 //for entity managers not created by parsers 408 fSecurityManager = new XMLSecurityManager(true); 409 fEntityStorage = new XMLEntityStorage(this) ; 410 setScannerVersion(Constants.XML_VERSION_1_0); 411 } // <init>() 412 413 /** Default constructor. */ 414 public XMLEntityManager(PropertyManager propertyManager) { 415 fPropertyManager = propertyManager ; 416 //pass a reference to current entity being scanned 417 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 418 fEntityStorage = new XMLEntityStorage(this) ; 419 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 420 reset(propertyManager); 421 } // <init>() 422 423 /** 424 * Adds an internal entity declaration. 425 * <p> 426 * <strong>Note:</strong> This method ignores subsequent entity 427 * declarations. 428 * <p> 429 * <strong>Note:</strong> The name should be a unique symbol. The 430 * SymbolTable can be used for this purpose. 431 * 432 * @param name The name of the entity. 433 * @param text The text of the entity. 434 * 435 * @see SymbolTable 436 */ 437 public void addInternalEntity(String name, String text) { 438 if (!fEntities.containsKey(name)) { 439 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 440 fEntities.put(name, entity); 441 } else{ 442 if(fWarnDuplicateEntityDef){ 443 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 444 "MSG_DUPLICATE_ENTITY_DEFINITION", 445 new Object[]{ name }, 446 XMLErrorReporter.SEVERITY_WARNING ); 447 } 448 } 449 450 } // addInternalEntity(String,String) 451 452 /** 453 * Adds an external entity declaration. 454 * <p> 455 * <strong>Note:</strong> This method ignores subsequent entity 456 * declarations. 457 * <p> 458 * <strong>Note:</strong> The name should be a unique symbol. The 459 * SymbolTable can be used for this purpose. 460 * 461 * @param name The name of the entity. 462 * @param publicId The public identifier of the entity. 463 * @param literalSystemId The system identifier of the entity. 464 * @param baseSystemId The base system identifier of the entity. 465 * This is the system identifier of the entity 466 * where <em>the entity being added</em> and 467 * is used to expand the system identifier when 468 * the system identifier is a relative URI. 469 * When null the system identifier of the first 470 * external entity on the stack is used instead. 471 * 472 * @see SymbolTable 473 */ 474 public void addExternalEntity(String name, 475 String publicId, String literalSystemId, 476 String baseSystemId) throws IOException { 477 if (!fEntities.containsKey(name)) { 478 if (baseSystemId == null) { 479 // search for the first external entity on the stack 480 int size = fEntityStack.size(); 481 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 482 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 483 } 484 for (int i = size - 1; i >= 0 ; i--) { 485 Entity.ScannedEntity externalEntity = 486 (Entity.ScannedEntity)fEntityStack.elementAt(i); 487 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 488 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 489 break; 490 } 491 } 492 } 493 Entity entity = new Entity.ExternalEntity(name, 494 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 495 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 496 fEntities.put(name, entity); 497 } else{ 498 if(fWarnDuplicateEntityDef){ 499 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 500 "MSG_DUPLICATE_ENTITY_DEFINITION", 501 new Object[]{ name }, 502 XMLErrorReporter.SEVERITY_WARNING ); 503 } 504 } 505 506 } // addExternalEntity(String,String,String,String) 507 508 509 /** 510 * Adds an unparsed entity declaration. 511 * <p> 512 * <strong>Note:</strong> This method ignores subsequent entity 513 * declarations. 514 * <p> 515 * <strong>Note:</strong> The name should be a unique symbol. The 516 * SymbolTable can be used for this purpose. 517 * 518 * @param name The name of the entity. 519 * @param publicId The public identifier of the entity. 520 * @param systemId The system identifier of the entity. 521 * @param notation The name of the notation. 522 * 523 * @see SymbolTable 524 */ 525 public void addUnparsedEntity(String name, 526 String publicId, String systemId, 527 String baseSystemId, String notation) { 528 if (!fEntities.containsKey(name)) { 529 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 530 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 531 notation, fInExternalSubset); 532 fEntities.put(name, entity); 533 } else{ 534 if(fWarnDuplicateEntityDef){ 535 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 536 "MSG_DUPLICATE_ENTITY_DEFINITION", 537 new Object[]{ name }, 538 XMLErrorReporter.SEVERITY_WARNING ); 539 } 540 } 541 } // addUnparsedEntity(String,String,String,String) 542 543 544 /** get the entity storage object from entity manager */ 545 public XMLEntityStorage getEntityStore(){ 546 return fEntityStorage ; 547 } 548 549 /** return the entity responsible for reading the entity */ 550 public XMLEntityScanner getEntityScanner(){ 551 if(fEntityScanner == null) { 552 // default to 1.0 553 if(fXML10EntityScanner == null) { 554 fXML10EntityScanner = new XMLEntityScanner(); 555 } 556 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 557 fEntityScanner = fXML10EntityScanner; 558 } 559 return fEntityScanner; 560 561 } 562 563 public void setScannerVersion(short version) { 564 565 if(version == Constants.XML_VERSION_1_0) { 566 if(fXML10EntityScanner == null) { 567 fXML10EntityScanner = new XMLEntityScanner(); 568 } 569 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 570 fEntityScanner = fXML10EntityScanner; 571 fEntityScanner.setCurrentEntity(fCurrentEntity); 572 } else { 573 if(fXML11EntityScanner == null) { 574 fXML11EntityScanner = new XML11EntityScanner(); 575 } 576 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 577 fEntityScanner = fXML11EntityScanner; 578 fEntityScanner.setCurrentEntity(fCurrentEntity); 579 } 580 581 } 582 583 /** 584 * This method uses the passed-in XMLInputSource to make 585 * fCurrentEntity usable for reading. 586 * 587 * @param reference flag to indicate whether the entity is an Entity Reference. 588 * @param name name of the entity (XML is it's the document entity) 589 * @param xmlInputSource the input source, with sufficient information 590 * to begin scanning characters. 591 * @param literal True if this entity is started within a 592 * literal value. 593 * @param isExternal whether this entity should be treated as an internal or external entity. 594 * @throws IOException if anything can't be read 595 * XNIException If any parser-specific goes wrong. 596 * @return the encoding of the new entity or null if a character stream was employed 597 */ 598 public String setupCurrentEntity(boolean reference, String name, XMLInputSource xmlInputSource, 599 boolean literal, boolean isExternal) 600 throws IOException, XNIException { 601 // get information 602 603 final String publicId = xmlInputSource.getPublicId(); 604 String literalSystemId = xmlInputSource.getSystemId(); 605 String baseSystemId = xmlInputSource.getBaseSystemId(); 606 String encoding = xmlInputSource.getEncoding(); 607 final boolean encodingExternallySpecified = (encoding != null); 608 Boolean isBigEndian = null; 609 610 // create reader 611 InputStream stream = null; 612 Reader reader = xmlInputSource.getCharacterStream(); 613 614 // First chance checking strict URI 615 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 616 if (baseSystemId == null) { 617 baseSystemId = expandedSystemId; 618 } 619 if (reader == null) { 620 stream = xmlInputSource.getByteStream(); 621 if (stream == null) { 622 URL location = new URL(expandedSystemId); 623 URLConnection connect = location.openConnection(); 624 if (!(connect instanceof HttpURLConnection)) { 625 stream = connect.getInputStream(); 626 } 627 else { 628 boolean followRedirects = true; 629 630 // setup URLConnection if we have an HTTPInputSource 631 if (xmlInputSource instanceof HTTPInputSource) { 632 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 633 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 634 635 // set request properties 636 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 637 while (propIter.hasNext()) { 638 Map.Entry entry = (Map.Entry) propIter.next(); 639 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue()); 640 } 641 642 // set preference for redirection 643 followRedirects = httpInputSource.getFollowHTTPRedirects(); 644 if (!followRedirects) { 645 urlConnection.setInstanceFollowRedirects(followRedirects); 646 } 647 } 648 649 stream = connect.getInputStream(); 650 651 // REVISIT: If the URLConnection has external encoding 652 // information, we should be reading it here. It's located 653 // in the charset parameter of Content-Type. -- mrglavas 654 655 if (followRedirects) { 656 String redirect = connect.getURL().toString(); 657 // E43: Check if the URL was redirected, and then 658 // update literal and expanded system IDs if needed. 659 if (!redirect.equals(expandedSystemId)) { 660 literalSystemId = redirect; 661 expandedSystemId = redirect; 662 } 663 } 664 } 665 } 666 667 // wrap this stream in RewindableInputStream 668 stream = new RewindableInputStream(stream); 669 670 // perform auto-detect of encoding if necessary 671 if (encoding == null) { 672 // read first four bytes and determine encoding 673 final byte[] b4 = new byte[4]; 674 int count = 0; 675 for (; count<4; count++ ) { 676 b4[count] = (byte)stream.read(); 677 } 678 if (count == 4) { 679 Object [] encodingDesc = getEncodingName(b4, count); 680 encoding = (String)(encodingDesc[0]); 681 isBigEndian = (Boolean)(encodingDesc[1]); 682 683 stream.reset(); 684 // Special case UTF-8 files with BOM created by Microsoft 685 // tools. It's more efficient to consume the BOM than make 686 // the reader perform extra checks. -Ac 687 if (count > 2 && encoding.equals("UTF-8")) { 688 int b0 = b4[0] & 0xFF; 689 int b1 = b4[1] & 0xFF; 690 int b2 = b4[2] & 0xFF; 691 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 692 // ignore first three bytes... 693 stream.skip(3); 694 } 695 } 696 reader = createReader(stream, encoding, isBigEndian); 697 } else { 698 reader = createReader(stream, encoding, isBigEndian); 699 } 700 } 701 702 // use specified encoding 703 else { 704 encoding = encoding.toUpperCase(Locale.ENGLISH); 705 706 // If encoding is UTF-8, consume BOM if one is present. 707 if (encoding.equals("UTF-8")) { 708 final int[] b3 = new int[3]; 709 int count = 0; 710 for (; count < 3; ++count) { 711 b3[count] = stream.read(); 712 if (b3[count] == -1) 713 break; 714 } 715 if (count == 3) { 716 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 717 // First three bytes are not BOM, so reset. 718 stream.reset(); 719 } 720 } else { 721 stream.reset(); 722 } 723 } 724 // If encoding is UTF-16, we still need to read the first four bytes 725 // in order to discover the byte order. 726 else if (encoding.equals("UTF-16")) { 727 final int[] b4 = new int[4]; 728 int count = 0; 729 for (; count < 4; ++count) { 730 b4[count] = stream.read(); 731 if (b4[count] == -1) 732 break; 733 } 734 stream.reset(); 735 736 String utf16Encoding = "UTF-16"; 737 if (count >= 2) { 738 final int b0 = b4[0]; 739 final int b1 = b4[1]; 740 if (b0 == 0xFE && b1 == 0xFF) { 741 // UTF-16, big-endian 742 utf16Encoding = "UTF-16BE"; 743 isBigEndian = Boolean.TRUE; 744 } 745 else if (b0 == 0xFF && b1 == 0xFE) { 746 // UTF-16, little-endian 747 utf16Encoding = "UTF-16LE"; 748 isBigEndian = Boolean.FALSE; 749 } 750 else if (count == 4) { 751 final int b2 = b4[2]; 752 final int b3 = b4[3]; 753 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 754 // UTF-16, big-endian, no BOM 755 utf16Encoding = "UTF-16BE"; 756 isBigEndian = Boolean.TRUE; 757 } 758 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 759 // UTF-16, little-endian, no BOM 760 utf16Encoding = "UTF-16LE"; 761 isBigEndian = Boolean.FALSE; 762 } 763 } 764 } 765 reader = createReader(stream, utf16Encoding, isBigEndian); 766 } 767 // If encoding is UCS-4, we still need to read the first four bytes 768 // in order to discover the byte order. 769 else if (encoding.equals("ISO-10646-UCS-4")) { 770 final int[] b4 = new int[4]; 771 int count = 0; 772 for (; count < 4; ++count) { 773 b4[count] = stream.read(); 774 if (b4[count] == -1) 775 break; 776 } 777 stream.reset(); 778 779 // Ignore unusual octet order for now. 780 if (count == 4) { 781 // UCS-4, big endian (1234) 782 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 783 isBigEndian = Boolean.TRUE; 784 } 785 // UCS-4, little endian (1234) 786 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 787 isBigEndian = Boolean.FALSE; 788 } 789 } 790 } 791 // If encoding is UCS-2, we still need to read the first four bytes 792 // in order to discover the byte order. 793 else if (encoding.equals("ISO-10646-UCS-2")) { 794 final int[] b4 = new int[4]; 795 int count = 0; 796 for (; count < 4; ++count) { 797 b4[count] = stream.read(); 798 if (b4[count] == -1) 799 break; 800 } 801 stream.reset(); 802 803 if (count == 4) { 804 // UCS-2, big endian 805 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 806 isBigEndian = Boolean.TRUE; 807 } 808 // UCS-2, little endian 809 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 810 isBigEndian = Boolean.FALSE; 811 } 812 } 813 } 814 815 reader = createReader(stream, encoding, isBigEndian); 816 } 817 818 // read one character at a time so we don't jump too far 819 // ahead, converting characters from the byte stream in 820 // the wrong encoding 821 if (DEBUG_ENCODINGS) { 822 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 823 } 824 //reader = new OneCharReader(reader); 825 } 826 827 // We've seen a new Reader. 828 // Push it on the stack so we can close it later. 829 //fOwnReaders.add(reader); 830 831 // push entity on stack 832 if (fCurrentEntity != null) { 833 fEntityStack.push(fCurrentEntity); 834 } 835 836 // create entity 837 /* if encoding is specified externally, 'encoding' information present 838 * in the prolog of the XML document is not considered. Hence, prolog can 839 * be read in Chunks of data instead of byte by byte. 840 */ 841 fCurrentEntity = new Entity.ScannedEntity(reference, name, 842 new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId), 843 stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 844 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 845 fEntityScanner.setCurrentEntity(fCurrentEntity); 846 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 847 if (fLimitAnalyzer != null) { 848 fLimitAnalyzer.startEntity(name); 849 } 850 return encoding; 851 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 852 853 854 /** 855 * Checks whether an entity given by name is external. 856 * 857 * @param entityName The name of the entity to check. 858 * @return True if the entity is external, false otherwise 859 * (including when the entity is not declared). 860 */ 861 public boolean isExternalEntity(String entityName) { 862 863 Entity entity = fEntities.get(entityName); 864 if (entity == null) { 865 return false; 866 } 867 return entity.isExternal(); 868 } 869 870 /** 871 * Checks whether the declaration of an entity given by name is 872 * // in the external subset. 873 * 874 * @param entityName The name of the entity to check. 875 * @return True if the entity was declared in the external subset, false otherwise 876 * (including when the entity is not declared). 877 */ 878 public boolean isEntityDeclInExternalSubset(String entityName) { 879 880 Entity entity = fEntities.get(entityName); 881 if (entity == null) { 882 return false; 883 } 884 return entity.isEntityDeclInExternalSubset(); 885 } 886 887 888 889 // 890 // Public methods 891 // 892 893 /** 894 * Sets whether the document entity is standalone. 895 * 896 * @param standalone True if document entity is standalone. 897 */ 898 public void setStandalone(boolean standalone) { 899 fStandalone = standalone; 900 } 901 // setStandalone(boolean) 902 903 /** Returns true if the document entity is standalone. */ 904 public boolean isStandalone() { 905 return fStandalone; 906 } //isStandalone():boolean 907 908 public boolean isDeclaredEntity(String entityName) { 909 910 Entity entity = fEntities.get(entityName); 911 return entity != null; 912 } 913 914 public boolean isUnparsedEntity(String entityName) { 915 916 Entity entity = fEntities.get(entityName); 917 if (entity == null) { 918 return false; 919 } 920 return entity.isUnparsed(); 921 } 922 923 924 925 // this simply returns the fResourceIdentifier object; 926 // this should only be used with caution by callers that 927 // carefully manage the entity manager's behaviour, so that 928 // this doesn't returning meaningless or misleading data. 929 // @return a reference to the current fResourceIdentifier object 930 public XMLResourceIdentifier getCurrentResourceIdentifier() { 931 return fResourceIdentifier; 932 } 933 934 /** 935 * Sets the entity handler. When an entity starts and ends, the 936 * entity handler is notified of the change. 937 * 938 * @param entityHandler The new entity handler. 939 */ 940 941 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 942 fEntityHandler = (XMLEntityHandler) entityHandler; 943 } // setEntityHandler(XMLEntityHandler) 944 945 //this function returns StaxXMLInputSource 946 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 947 948 if(resourceIdentifier == null ) return null; 949 950 String publicId = resourceIdentifier.getPublicId(); 951 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 952 String baseSystemId = resourceIdentifier.getBaseSystemId(); 953 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 954 // if no base systemId given, assume that it's relative 955 // to the systemId of the current scanned entity 956 // Sometimes the system id is not (properly) expanded. 957 // We need to expand the system id if: 958 // a. the expanded one was null; or 959 // b. the base system id was null, but becomes non-null from the current entity. 960 boolean needExpand = (expandedSystemId == null); 961 // REVISIT: why would the baseSystemId ever be null? if we 962 // didn't have to make this check we wouldn't have to reuse the 963 // fXMLResourceIdentifier object... 964 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 965 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 966 if (baseSystemId != null) 967 needExpand = true; 968 } 969 if (needExpand) 970 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 971 972 // give the entity resolver a chance 973 StaxXMLInputSource staxInputSource = null; 974 XMLInputSource xmlInputSource = null; 975 976 XMLResourceIdentifierImpl ri = null; 977 978 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 979 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 980 } else { 981 fResourceIdentifier.clear(); 982 ri = fResourceIdentifier; 983 } 984 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 985 if(DEBUG_RESOLVER){ 986 System.out.println("BEFORE Calling resolveEntity") ; 987 } 988 989 fISCreatedByResolver = false; 990 //either of Stax or Xerces would be null 991 if(fStaxEntityResolver != null){ 992 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 993 if(staxInputSource != null) { 994 fISCreatedByResolver = true; 995 } 996 } 997 998 if(fEntityResolver != null){ 999 xmlInputSource = fEntityResolver.resolveEntity(ri); 1000 if(xmlInputSource != null) { 1001 fISCreatedByResolver = true; 1002 } 1003 } 1004 1005 if(xmlInputSource != null){ 1006 //wrap this XMLInputSource to StaxInputSource 1007 staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver); 1008 } 1009 1010 // do default resolution 1011 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 1012 if (staxInputSource == null) { 1013 // REVISIT: when systemId is null, I think we should return null. 1014 // is this the right solution? -SG 1015 //if (systemId != null) 1016 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 1017 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 1018 //Waiting for the clarification from EG. - nb 1019 } 1020 1021 if (DEBUG_RESOLVER) { 1022 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1023 System.err.println(" = " + xmlInputSource); 1024 } 1025 1026 return staxInputSource; 1027 1028 } 1029 1030 /** 1031 * Resolves the specified public and system identifiers. This 1032 * method first attempts to resolve the entity based on the 1033 * EntityResolver registered by the application. If no entity 1034 * resolver is registered or if the registered entity handler 1035 * is unable to resolve the entity, then default entity 1036 * resolution will occur. 1037 * 1038 * @param publicId The public identifier of the entity. 1039 * @param systemId The system identifier of the entity. 1040 * @param baseSystemId The base system identifier of the entity. 1041 * This is the system identifier of the current 1042 * entity and is used to expand the system 1043 * identifier when the system identifier is a 1044 * relative URI. 1045 * 1046 * @return Returns an input source that wraps the resolved entity. 1047 * This method will never return null. 1048 * 1049 * @throws IOException Thrown on i/o error. 1050 * @throws XNIException Thrown by entity resolver to signal an error. 1051 */ 1052 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1053 if(resourceIdentifier == null ) return null; 1054 String publicId = resourceIdentifier.getPublicId(); 1055 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1056 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1057 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1058 String namespace = resourceIdentifier.getNamespace(); 1059 1060 // if no base systemId given, assume that it's relative 1061 // to the systemId of the current scanned entity 1062 // Sometimes the system id is not (properly) expanded. 1063 // We need to expand the system id if: 1064 // a. the expanded one was null; or 1065 // b. the base system id was null, but becomes non-null from the current entity. 1066 boolean needExpand = (expandedSystemId == null); 1067 // REVISIT: why would the baseSystemId ever be null? if we 1068 // didn't have to make this check we wouldn't have to reuse the 1069 // fXMLResourceIdentifier object... 1070 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1071 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1072 if (baseSystemId != null) 1073 needExpand = true; 1074 } 1075 if (needExpand) 1076 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1077 1078 // give the entity resolver a chance 1079 XMLInputSource xmlInputSource = null; 1080 1081 if (fEntityResolver != null) { 1082 resourceIdentifier.setBaseSystemId(baseSystemId); 1083 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1084 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1085 } 1086 1087 // do default resolution 1088 // REVISIT: what's the correct behavior if the user provided an entity 1089 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1090 // an input source (xmlInputSource == null)? 1091 // do we do default resolution, or do we just return null? -SG 1092 if (xmlInputSource == null) { 1093 // REVISIT: when systemId is null, I think we should return null. 1094 // is this the right solution? -SG 1095 //if (systemId != null) 1096 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1097 } 1098 1099 if (DEBUG_RESOLVER) { 1100 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1101 System.err.println(" = " + xmlInputSource); 1102 } 1103 1104 return xmlInputSource; 1105 1106 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1107 1108 /** 1109 * Starts a named entity. 1110 * 1111 * @param reference flag to indicate whether the entity is an Entity Reference. 1112 * @param entityName The name of the entity to start. 1113 * @param literal True if this entity is started within a literal 1114 * value. 1115 * 1116 * @throws IOException Thrown on i/o error. 1117 * @throws XNIException Thrown by entity handler to signal an error. 1118 */ 1119 public void startEntity(boolean reference, String entityName, boolean literal) 1120 throws IOException, XNIException { 1121 1122 // was entity declared? 1123 Entity entity = fEntityStorage.getEntity(entityName); 1124 if (entity == null) { 1125 if (fEntityHandler != null) { 1126 String encoding = null; 1127 fResourceIdentifier.clear(); 1128 fEntityAugs.removeAllItems(); 1129 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1130 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1131 fEntityAugs.removeAllItems(); 1132 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1133 fEntityHandler.endEntity(entityName, fEntityAugs); 1134 } 1135 return; 1136 } 1137 1138 // should we skip external entities? 1139 boolean external = entity.isExternal(); 1140 Entity.ExternalEntity externalEntity = null; 1141 String extLitSysId = null, extBaseSysId = null, expandedSystemId = null; 1142 if (external) { 1143 externalEntity = (Entity.ExternalEntity)entity; 1144 extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1145 extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1146 expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1147 boolean unparsed = entity.isUnparsed(); 1148 boolean parameter = entityName.startsWith("%"); 1149 boolean general = !parameter; 1150 if (unparsed || (general && !fExternalGeneralEntities) || 1151 (parameter && !fExternalParameterEntities) || 1152 !fSupportDTD || !fSupportExternalEntities) { 1153 1154 if (fEntityHandler != null) { 1155 fResourceIdentifier.clear(); 1156 final String encoding = null; 1157 fResourceIdentifier.setValues( 1158 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1159 extLitSysId, extBaseSysId, expandedSystemId); 1160 fEntityAugs.removeAllItems(); 1161 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1162 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1163 fEntityAugs.removeAllItems(); 1164 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1165 fEntityHandler.endEntity(entityName, fEntityAugs); 1166 } 1167 return; 1168 } 1169 } 1170 1171 // is entity recursive? 1172 int size = fEntityStack.size(); 1173 for (int i = size; i >= 0; i--) { 1174 Entity activeEntity = i == size 1175 ? fCurrentEntity 1176 : (Entity)fEntityStack.elementAt(i); 1177 if (activeEntity.name == entityName) { 1178 String path = entityName; 1179 for (int j = i + 1; j < size; j++) { 1180 activeEntity = (Entity)fEntityStack.elementAt(j); 1181 path = path + " -> " + activeEntity.name; 1182 } 1183 path = path + " -> " + fCurrentEntity.name; 1184 path = path + " -> " + entityName; 1185 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1186 "RecursiveReference", 1187 new Object[] { entityName, path }, 1188 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1189 1190 if (fEntityHandler != null) { 1191 fResourceIdentifier.clear(); 1192 final String encoding = null; 1193 if (external) { 1194 fResourceIdentifier.setValues( 1195 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1196 extLitSysId, extBaseSysId, expandedSystemId); 1197 } 1198 fEntityAugs.removeAllItems(); 1199 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1200 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1201 fEntityAugs.removeAllItems(); 1202 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1203 fEntityHandler.endEntity(entityName, fEntityAugs); 1204 } 1205 1206 return; 1207 } 1208 } 1209 1210 // resolve external entity 1211 StaxXMLInputSource staxInputSource = null; 1212 XMLInputSource xmlInputSource = null ; 1213 1214 if (external) { 1215 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1216 /** xxx: Waiting from the EG 1217 * //simply return if there was entity resolver registered and application 1218 * //returns either XMLStreamReader or XMLEventReader. 1219 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1220 */ 1221 xmlInputSource = staxInputSource.getXMLInputSource() ; 1222 if (!fISCreatedByResolver) { 1223 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation 1224 if (fLoadExternalDTD) { 1225 String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL); 1226 if (accessError != null) { 1227 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1228 "AccessExternalEntity", 1229 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError }, 1230 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1231 } 1232 } 1233 } 1234 } 1235 // wrap internal entity 1236 else { 1237 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1238 Reader reader = new StringReader(internalEntity.text); 1239 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1240 } 1241 1242 // start the entity 1243 startEntity(reference, entityName, xmlInputSource, literal, external); 1244 1245 } // startEntity(String,boolean) 1246 1247 /** 1248 * Starts the document entity. The document entity has the "[xml]" 1249 * pseudo-name. 1250 * 1251 * @param xmlInputSource The input source of the document entity. 1252 * 1253 * @throws IOException Thrown on i/o error. 1254 * @throws XNIException Thrown by entity handler to signal an error. 1255 */ 1256 public void startDocumentEntity(XMLInputSource xmlInputSource) 1257 throws IOException, XNIException { 1258 startEntity(false, XMLEntity, xmlInputSource, false, true); 1259 } // startDocumentEntity(XMLInputSource) 1260 1261 //xxx these methods are not required. 1262 /** 1263 * Starts the DTD entity. The DTD entity has the "[dtd]" 1264 * pseudo-name. 1265 * 1266 * @param xmlInputSource The input source of the DTD entity. 1267 * 1268 * @throws IOException Thrown on i/o error. 1269 * @throws XNIException Thrown by entity handler to signal an error. 1270 */ 1271 public void startDTDEntity(XMLInputSource xmlInputSource) 1272 throws IOException, XNIException { 1273 startEntity(false, DTDEntity, xmlInputSource, false, true); 1274 } // startDTDEntity(XMLInputSource) 1275 1276 // indicate start of external subset so that 1277 // location of entity decls can be tracked 1278 public void startExternalSubset() { 1279 fInExternalSubset = true; 1280 } 1281 1282 public void endExternalSubset() { 1283 fInExternalSubset = false; 1284 } 1285 1286 /** 1287 * Starts an entity. 1288 * <p> 1289 * This method can be used to insert an application defined XML 1290 * entity stream into the parsing stream. 1291 * 1292 * @param reference flag to indicate whether the entity is an Entity Reference. 1293 * @param name The name of the entity. 1294 * @param xmlInputSource The input source of the entity. 1295 * @param literal True if this entity is started within a 1296 * literal value. 1297 * @param isExternal whether this entity should be treated as an internal or external entity. 1298 * 1299 * @throws IOException Thrown on i/o error. 1300 * @throws XNIException Thrown by entity handler to signal an error. 1301 */ 1302 public void startEntity(boolean reference, String name, 1303 XMLInputSource xmlInputSource, 1304 boolean literal, boolean isExternal) 1305 throws IOException, XNIException { 1306 1307 String encoding = setupCurrentEntity(reference, name, xmlInputSource, literal, isExternal); 1308 1309 //when entity expansion limit is set by the Application, we need to 1310 //check for the entity expansion limit set by the parser, if number of entity 1311 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1312 // Note that this represents the nesting level of open entities. 1313 fEntityExpansionCount++; 1314 if(fLimitAnalyzer != null) { 1315 fLimitAnalyzer.addValue(entityExpansionIndex, name, 1); 1316 } 1317 if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){ 1318 fSecurityManager.debugPrint(fLimitAnalyzer); 1319 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimit", 1320 new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)}, 1321 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1322 // is there anything better to do than reset the counter? 1323 // at least one can envision debugging applications where this might 1324 // be useful... 1325 fEntityExpansionCount = 0; 1326 } 1327 1328 // call handler 1329 if (fEntityHandler != null) { 1330 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1331 } 1332 1333 } // startEntity(String,XMLInputSource) 1334 1335 /** 1336 * Return the current entity being scanned. Current entity is SET using startEntity function. 1337 * @return Entity.ScannedEntity 1338 */ 1339 1340 public Entity.ScannedEntity getCurrentEntity(){ 1341 return fCurrentEntity ; 1342 } 1343 1344 /** 1345 * Return the top level entity handled by this manager, or null 1346 * if no entity was added. 1347 */ 1348 public Entity.ScannedEntity getTopLevelEntity() { 1349 return (Entity.ScannedEntity) 1350 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1351 } 1352 1353 1354 /** 1355 * Close all opened InputStreams and Readers opened by this parser. 1356 */ 1357 public void closeReaders() { 1358 /** this call actually does nothing, readers are closed in the endEntity method 1359 * through the current entity. 1360 * The change seems to have happened during the jdk6 development with the 1361 * addition of StAX 1362 **/ 1363 } 1364 1365 public void endEntity() throws IOException, XNIException { 1366 1367 // call handler 1368 if (DEBUG_BUFFER) { 1369 System.out.print("(endEntity: "); 1370 print(); 1371 System.out.println(); 1372 } 1373 //pop the entity from the stack 1374 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1375 1376 /** need to close the reader first since the program can end 1377 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1378 * leaving the reader open 1379 */ 1380 //close the reader 1381 if(fCurrentEntity != null){ 1382 //close the reader 1383 try{ 1384 if (fLimitAnalyzer != null) { 1385 fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name); 1386 if (fCurrentEntity.name.equals("[xml]")) { 1387 fSecurityManager.debugPrint(fLimitAnalyzer); 1388 } 1389 } 1390 fCurrentEntity.close(); 1391 }catch(IOException ex){ 1392 throw new XNIException(ex); 1393 } 1394 } 1395 1396 if (fEntityHandler != null) { 1397 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1398 if(entity == null){ 1399 fEntityAugs.removeAllItems(); 1400 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1401 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1402 fEntityAugs.removeAllItems(); 1403 }else{ 1404 fEntityHandler.endEntity(fCurrentEntity.name, null); 1405 } 1406 } 1407 //check if it is a document entity 1408 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1409 1410 //set popped entity as current entity 1411 fCurrentEntity = entity; 1412 fEntityScanner.setCurrentEntity(fCurrentEntity); 1413 1414 //check if there are any entity left in the stack -- if there are 1415 //no entries EOF has been reached. 1416 // throw exception when it is the last entity but it is not a document entity 1417 1418 if(fCurrentEntity == null & !documentEntity){ 1419 throw new EOFException() ; 1420 } 1421 1422 if (DEBUG_BUFFER) { 1423 System.out.print(")endEntity: "); 1424 print(); 1425 System.out.println(); 1426 } 1427 1428 } // endEntity() 1429 1430 1431 // 1432 // XMLComponent methods 1433 // 1434 public void reset(PropertyManager propertyManager){ 1435 // xerces properties 1436 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1437 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1438 try { 1439 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1440 } catch (XMLConfigurationException e) { 1441 fStaxEntityResolver = null; 1442 } 1443 1444 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 1445 fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue(); 1446 fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue(); 1447 1448 // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd 1449 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 1450 1451 // JAXP 1.5 feature 1452 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 1453 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1454 1455 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 1456 1457 fLimitAnalyzer = new XMLLimitAnalyzer(); 1458 //reset fEntityStorage 1459 fEntityStorage.reset(propertyManager); 1460 //reset XMLEntityReaderImpl 1461 fEntityScanner.reset(propertyManager); 1462 1463 // initialize state 1464 //fStandalone = false; 1465 fEntities.clear(); 1466 fEntityStack.removeAllElements(); 1467 fCurrentEntity = null; 1468 fValidation = false; 1469 fExternalGeneralEntities = true; 1470 fExternalParameterEntities = true; 1471 fAllowJavaEncodings = true ; 1472 } 1473 1474 /** 1475 * Resets the component. The component can query the component manager 1476 * about any features and properties that affect the operation of the 1477 * component. 1478 * 1479 * @param componentManager The component manager. 1480 * 1481 * @throws SAXException Thrown by component on initialization error. 1482 * For example, if a feature or property is 1483 * required for the operation of the component, the 1484 * component manager may throw a 1485 * SAXNotRecognizedException or a 1486 * SAXNotSupportedException. 1487 */ 1488 public void reset(XMLComponentManager componentManager) 1489 throws XMLConfigurationException { 1490 1491 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1492 1493 if (!parser_settings) { 1494 // parser settings have not been changed 1495 reset(); 1496 if(fEntityScanner != null){ 1497 fEntityScanner.reset(componentManager); 1498 } 1499 if(fEntityStorage != null){ 1500 fEntityStorage.reset(componentManager); 1501 } 1502 return; 1503 } 1504 1505 // sax features 1506 fValidation = componentManager.getFeature(VALIDATION, false); 1507 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1508 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1509 1510 // xerces features 1511 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1512 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1513 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1514 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 1515 1516 // xerces properties 1517 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1518 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1519 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1520 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1521 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1522 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1523 entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT); 1524 1525 //StAX Property 1526 fSupportDTD = true; 1527 fReplaceEntityReferences = true; 1528 fSupportExternalEntities = true; 1529 1530 // JAXP 1.5 feature 1531 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 1532 if (spm == null) { 1533 spm = new XMLSecurityPropertyManager(); 1534 } 1535 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1536 1537 //reset general state 1538 reset(); 1539 1540 fEntityScanner.reset(componentManager); 1541 fEntityStorage.reset(componentManager); 1542 1543 } // reset(XMLComponentManager) 1544 1545 // reset general state. Should not be called other than by 1546 // a class acting as a component manager but not 1547 // implementing that interface for whatever reason. 1548 public void reset() { 1549 fLimitAnalyzer = new XMLLimitAnalyzer(); 1550 // initialize state 1551 fStandalone = false; 1552 fEntities.clear(); 1553 fEntityStack.removeAllElements(); 1554 fEntityExpansionCount = 0; 1555 1556 fCurrentEntity = null; 1557 // reset scanner 1558 if(fXML10EntityScanner != null){ 1559 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1560 } 1561 if(fXML11EntityScanner != null) { 1562 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1563 } 1564 1565 // DEBUG 1566 if (DEBUG_ENTITIES) { 1567 addInternalEntity("text", "Hello, World."); 1568 addInternalEntity("empty-element", "<foo/>"); 1569 addInternalEntity("balanced-element", "<foo></foo>"); 1570 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1571 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1572 addInternalEntity("unbalanced-entity", "<foo>"); 1573 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1574 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1575 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1576 try { 1577 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1578 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1579 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1580 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1581 } 1582 catch (IOException ex) { 1583 // should never happen 1584 } 1585 } 1586 1587 fEntityHandler = null; 1588 1589 // reset scanner 1590 //if(fEntityScanner!=null) 1591 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1592 1593 } 1594 /** 1595 * Returns a list of feature identifiers that are recognized by 1596 * this component. This method may return null if no features 1597 * are recognized by this component. 1598 */ 1599 public String[] getRecognizedFeatures() { 1600 return (String[])(RECOGNIZED_FEATURES.clone()); 1601 } // getRecognizedFeatures():String[] 1602 1603 /** 1604 * Sets the state of a feature. This method is called by the component 1605 * manager any time after reset when a feature changes state. 1606 * <p> 1607 * <strong>Note:</strong> Components should silently ignore features 1608 * that do not affect the operation of the component. 1609 * 1610 * @param featureId The feature identifier. 1611 * @param state The state of the feature. 1612 * 1613 * @throws SAXNotRecognizedException The component should not throw 1614 * this exception. 1615 * @throws SAXNotSupportedException The component should not throw 1616 * this exception. 1617 */ 1618 public void setFeature(String featureId, boolean state) 1619 throws XMLConfigurationException { 1620 1621 // xerces features 1622 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1623 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1624 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1625 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1626 fAllowJavaEncodings = state; 1627 } 1628 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 1629 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 1630 fLoadExternalDTD = state; 1631 return; 1632 } 1633 } 1634 1635 } // setFeature(String,boolean) 1636 1637 /** 1638 * Sets the value of a property. This method is called by the component 1639 * manager any time after reset when a property changes value. 1640 * <p> 1641 * <strong>Note:</strong> Components should silently ignore properties 1642 * that do not affect the operation of the component. 1643 * 1644 * @param propertyId The property identifier. 1645 * @param value The value of the property. 1646 * 1647 * @throws SAXNotRecognizedException The component should not throw 1648 * this exception. 1649 * @throws SAXNotSupportedException The component should not throw 1650 * this exception. 1651 */ 1652 public void setProperty(String propertyId, Object value){ 1653 // Xerces properties 1654 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1655 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1656 1657 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1658 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1659 fSymbolTable = (SymbolTable)value; 1660 return; 1661 } 1662 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1663 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1664 fErrorReporter = (XMLErrorReporter)value; 1665 return; 1666 } 1667 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1668 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1669 fEntityResolver = (XMLEntityResolver)value; 1670 return; 1671 } 1672 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1673 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1674 Integer bufferSize = (Integer)value; 1675 if (bufferSize != null && 1676 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1677 fBufferSize = bufferSize.intValue(); 1678 fEntityScanner.setBufferSize(fBufferSize); 1679 fBufferPool.setExternalBufferSize(fBufferSize); 1680 } 1681 } 1682 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1683 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1684 fSecurityManager = (XMLSecurityManager)value; 1685 } 1686 } 1687 1688 //JAXP 1.5 properties 1689 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 1690 { 1691 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 1692 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1693 } 1694 } 1695 1696 public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) { 1697 this.fLimitAnalyzer = fLimitAnalyzer; 1698 } 1699 1700 /** 1701 * Returns a list of property identifiers that are recognized by 1702 * this component. This method may return null if no properties 1703 * are recognized by this component. 1704 */ 1705 public String[] getRecognizedProperties() { 1706 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1707 } // getRecognizedProperties():String[] 1708 /** 1709 * Returns the default state for a feature, or null if this 1710 * component does not want to report a default value for this 1711 * feature. 1712 * 1713 * @param featureId The feature identifier. 1714 * 1715 * @since Xerces 2.2.0 1716 */ 1717 public Boolean getFeatureDefault(String featureId) { 1718 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1719 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1720 return FEATURE_DEFAULTS[i]; 1721 } 1722 } 1723 return null; 1724 } // getFeatureDefault(String):Boolean 1725 1726 /** 1727 * Returns the default state for a property, or null if this 1728 * component does not want to report a default value for this 1729 * property. 1730 * 1731 * @param propertyId The property identifier. 1732 * 1733 * @since Xerces 2.2.0 1734 */ 1735 public Object getPropertyDefault(String propertyId) { 1736 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1737 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1738 return PROPERTY_DEFAULTS[i]; 1739 } 1740 } 1741 return null; 1742 } // getPropertyDefault(String):Object 1743 1744 // 1745 // Public static methods 1746 // 1747 1748 /** 1749 * Expands a system id and returns the system id as a URI, if 1750 * it can be expanded. A return value of null means that the 1751 * identifier is already expanded. An exception thrown 1752 * indicates a failure to expand the id. 1753 * 1754 * @param systemId The systemId to be expanded. 1755 * 1756 * @return Returns the URI string representing the expanded system 1757 * identifier. A null value indicates that the given 1758 * system identifier is already expanded. 1759 * 1760 */ 1761 public static String expandSystemId(String systemId) { 1762 return expandSystemId(systemId, null); 1763 } // expandSystemId(String):String 1764 1765 // 1766 // Public static methods 1767 // 1768 1769 // current value of the "user.dir" property 1770 private static String gUserDir; 1771 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1772 private static URI gUserDirURI; 1773 // which ASCII characters need to be escaped 1774 private static boolean gNeedEscaping[] = new boolean[128]; 1775 // the first hex character if a character needs to be escaped 1776 private static char gAfterEscaping1[] = new char[128]; 1777 // the second hex character if a character needs to be escaped 1778 private static char gAfterEscaping2[] = new char[128]; 1779 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1780 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1781 // initialize the above 3 arrays 1782 static { 1783 for (int i = 0; i <= 0x1f; i++) { 1784 gNeedEscaping[i] = true; 1785 gAfterEscaping1[i] = gHexChs[i >> 4]; 1786 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1787 } 1788 gNeedEscaping[0x7f] = true; 1789 gAfterEscaping1[0x7f] = '7'; 1790 gAfterEscaping2[0x7f] = 'F'; 1791 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1792 '|', '\\', '^', '~', '[', ']', '`'}; 1793 int len = escChs.length; 1794 char ch; 1795 for (int i = 0; i < len; i++) { 1796 ch = escChs[i]; 1797 gNeedEscaping[ch] = true; 1798 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1799 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1800 } 1801 } 1802 1803 // To escape the "user.dir" system property, by using %HH to represent 1804 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1805 // and '"'. It's a static method, so needs to be synchronized. 1806 // this method looks heavy, but since the system property isn't expected 1807 // to change often, so in most cases, we only need to return the URI 1808 // that was escaped before. 1809 // According to the URI spec, non-ASCII characters (whose value >= 128) 1810 // need to be escaped too. 1811 // REVISIT: don't know how to escape non-ASCII characters, especially 1812 // which encoding to use. Leave them for now. 1813 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1814 // get the user.dir property 1815 String userDir = ""; 1816 try { 1817 userDir = SecuritySupport.getSystemProperty("user.dir"); 1818 } 1819 catch (SecurityException se) { 1820 } 1821 1822 // return empty string if property value is empty string. 1823 if (userDir.length() == 0) 1824 return new URI("file", "", "", null, null); 1825 // compute the new escaped value if the new property value doesn't 1826 // match the previous one 1827 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1828 return gUserDirURI; 1829 } 1830 1831 // record the new value as the global property value 1832 gUserDir = userDir; 1833 1834 char separator = java.io.File.separatorChar; 1835 userDir = userDir.replace(separator, '/'); 1836 1837 int len = userDir.length(), ch; 1838 StringBuilder buffer = new StringBuilder(len*3); 1839 // change C:/blah to /C:/blah 1840 if (len >= 2 && userDir.charAt(1) == ':') { 1841 ch = Character.toUpperCase(userDir.charAt(0)); 1842 if (ch >= 'A' && ch <= 'Z') { 1843 buffer.append('/'); 1844 } 1845 } 1846 1847 // for each character in the path 1848 int i = 0; 1849 for (; i < len; i++) { 1850 ch = userDir.charAt(i); 1851 // if it's not an ASCII character, break here, and use UTF-8 encoding 1852 if (ch >= 128) 1853 break; 1854 if (gNeedEscaping[ch]) { 1855 buffer.append('%'); 1856 buffer.append(gAfterEscaping1[ch]); 1857 buffer.append(gAfterEscaping2[ch]); 1858 // record the fact that it's escaped 1859 } 1860 else { 1861 buffer.append((char)ch); 1862 } 1863 } 1864 1865 // we saw some non-ascii character 1866 if (i < len) { 1867 // get UTF-8 bytes for the remaining sub-string 1868 byte[] bytes = null; 1869 byte b; 1870 try { 1871 bytes = userDir.substring(i).getBytes("UTF-8"); 1872 } catch (java.io.UnsupportedEncodingException e) { 1873 // should never happen 1874 return new URI("file", "", userDir, null, null); 1875 } 1876 len = bytes.length; 1877 1878 // for each byte 1879 for (i = 0; i < len; i++) { 1880 b = bytes[i]; 1881 // for non-ascii character: make it positive, then escape 1882 if (b < 0) { 1883 ch = b + 256; 1884 buffer.append('%'); 1885 buffer.append(gHexChs[ch >> 4]); 1886 buffer.append(gHexChs[ch & 0xf]); 1887 } 1888 else if (gNeedEscaping[b]) { 1889 buffer.append('%'); 1890 buffer.append(gAfterEscaping1[b]); 1891 buffer.append(gAfterEscaping2[b]); 1892 } 1893 else { 1894 buffer.append((char)b); 1895 } 1896 } 1897 } 1898 1899 // change blah/blah to blah/blah/ 1900 if (!userDir.endsWith("/")) 1901 buffer.append('/'); 1902 1903 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1904 1905 return gUserDirURI; 1906 } 1907 1908 public static OutputStream createOutputStream(String uri) throws IOException { 1909 // URI was specified. Handle relative URIs. 1910 final String expanded = XMLEntityManager.expandSystemId(uri, null, true); 1911 final URL url = new URL(expanded != null ? expanded : uri); 1912 OutputStream out = null; 1913 String protocol = url.getProtocol(); 1914 String host = url.getHost(); 1915 // Use FileOutputStream if this URI is for a local file. 1916 if (protocol.equals("file") 1917 && (host == null || host.length() == 0 || host.equals("localhost"))) { 1918 File file = new File(getPathWithoutEscapes(url.getPath())); 1919 if (!file.exists()) { 1920 File parent = file.getParentFile(); 1921 if (parent != null && !parent.exists()) { 1922 parent.mkdirs(); 1923 } 1924 } 1925 out = new FileOutputStream(file); 1926 } 1927 // Try to write to some other kind of URI. Some protocols 1928 // won't support this, though HTTP should work. 1929 else { 1930 URLConnection urlCon = url.openConnection(); 1931 urlCon.setDoInput(false); 1932 urlCon.setDoOutput(true); 1933 urlCon.setUseCaches(false); // Enable tunneling. 1934 if (urlCon instanceof HttpURLConnection) { 1935 // The DOM L3 REC says if we are writing to an HTTP URI 1936 // it is to be done with an HTTP PUT. 1937 HttpURLConnection httpCon = (HttpURLConnection) urlCon; 1938 httpCon.setRequestMethod("PUT"); 1939 } 1940 out = urlCon.getOutputStream(); 1941 } 1942 return out; 1943 } 1944 1945 private static String getPathWithoutEscapes(String origPath) { 1946 if (origPath != null && origPath.length() != 0 && origPath.indexOf('%') != -1) { 1947 // Locate the escape characters 1948 StringTokenizer tokenizer = new StringTokenizer(origPath, "%"); 1949 StringBuilder result = new StringBuilder(origPath.length()); 1950 int size = tokenizer.countTokens(); 1951 result.append(tokenizer.nextToken()); 1952 for(int i = 1; i < size; ++i) { 1953 String token = tokenizer.nextToken(); 1954 // Decode the 2 digit hexadecimal number following % in '%nn' 1955 result.append((char)Integer.valueOf(token.substring(0, 2), 16).intValue()); 1956 result.append(token.substring(2)); 1957 } 1958 return result.toString(); 1959 } 1960 return origPath; 1961 } 1962 1963 /** 1964 * Absolutizes a URI using the current value 1965 * of the "user.dir" property as the base URI. If 1966 * the URI is already absolute, this is a no-op. 1967 * 1968 * @param uri the URI to absolutize 1969 */ 1970 public static void absolutizeAgainstUserDir(URI uri) 1971 throws URI.MalformedURIException { 1972 uri.absolutize(getUserDir()); 1973 } 1974 1975 /** 1976 * Expands a system id and returns the system id as a URI, if 1977 * it can be expanded. A return value of null means that the 1978 * identifier is already expanded. An exception thrown 1979 * indicates a failure to expand the id. 1980 * 1981 * @param systemId The systemId to be expanded. 1982 * 1983 * @return Returns the URI string representing the expanded system 1984 * identifier. A null value indicates that the given 1985 * system identifier is already expanded. 1986 * 1987 */ 1988 public static String expandSystemId(String systemId, String baseSystemId) { 1989 1990 // check for bad parameters id 1991 if (systemId == null || systemId.length() == 0) { 1992 return systemId; 1993 } 1994 // if id already expanded, return 1995 try { 1996 URI uri = new URI(systemId); 1997 if (uri != null) { 1998 return systemId; 1999 } 2000 } catch (URI.MalformedURIException e) { 2001 // continue on... 2002 } 2003 // normalize id 2004 String id = fixURI(systemId); 2005 2006 // normalize base 2007 URI base = null; 2008 URI uri = null; 2009 try { 2010 if (baseSystemId == null || baseSystemId.length() == 0 || 2011 baseSystemId.equals(systemId)) { 2012 String dir = getUserDir().toString(); 2013 base = new URI("file", "", dir, null, null); 2014 } else { 2015 try { 2016 base = new URI(fixURI(baseSystemId)); 2017 } catch (URI.MalformedURIException e) { 2018 if (baseSystemId.indexOf(':') != -1) { 2019 // for xml schemas we might have baseURI with 2020 // a specified drive 2021 base = new URI("file", "", fixURI(baseSystemId), null, null); 2022 } else { 2023 String dir = getUserDir().toString(); 2024 dir = dir + fixURI(baseSystemId); 2025 base = new URI("file", "", dir, null, null); 2026 } 2027 } 2028 } 2029 // expand id 2030 uri = new URI(base, id); 2031 } catch (Exception e) { 2032 // let it go through 2033 2034 } 2035 2036 if (uri == null) { 2037 return systemId; 2038 } 2039 return uri.toString(); 2040 2041 } // expandSystemId(String,String):String 2042 2043 /** 2044 * Expands a system id and returns the system id as a URI, if 2045 * it can be expanded. A return value of null means that the 2046 * identifier is already expanded. An exception thrown 2047 * indicates a failure to expand the id. 2048 * 2049 * @param systemId The systemId to be expanded. 2050 * 2051 * @return Returns the URI string representing the expanded system 2052 * identifier. A null value indicates that the given 2053 * system identifier is already expanded. 2054 * 2055 */ 2056 public static String expandSystemId(String systemId, String baseSystemId, 2057 boolean strict) 2058 throws URI.MalformedURIException { 2059 2060 // check if there is a system id before 2061 // trying to expand it. 2062 if (systemId == null) { 2063 return null; 2064 } 2065 2066 // system id has to be a valid URI 2067 if (strict) { 2068 2069 2070 // check if there is a system id before 2071 // trying to expand it. 2072 if (systemId == null) { 2073 return null; 2074 } 2075 2076 try { 2077 // if it's already an absolute one, return it 2078 new URI(systemId); 2079 return systemId; 2080 } 2081 catch (URI.MalformedURIException ex) { 2082 } 2083 URI base = null; 2084 // if there isn't a base uri, use the working directory 2085 if (baseSystemId == null || baseSystemId.length() == 0) { 2086 base = new URI("file", "", getUserDir().toString(), null, null); 2087 } 2088 // otherwise, use the base uri 2089 else { 2090 try { 2091 base = new URI(baseSystemId); 2092 } 2093 catch (URI.MalformedURIException e) { 2094 // assume "base" is also a relative uri 2095 String dir = getUserDir().toString(); 2096 dir = dir + baseSystemId; 2097 base = new URI("file", "", dir, null, null); 2098 } 2099 } 2100 // absolutize the system id using the base 2101 URI uri = new URI(base, systemId); 2102 // return the string rep of the new uri (an absolute one) 2103 return uri.toString(); 2104 2105 // if any exception is thrown, it'll get thrown to the caller. 2106 } 2107 2108 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 2109 try { 2110 return expandSystemIdStrictOff(systemId, baseSystemId); 2111 } 2112 catch (URI.MalformedURIException e) { 2113 /** Xerces URI rejects unicode, try java.net.URI 2114 * this is not ideal solution, but it covers known cases which either 2115 * Xerces URI or java.net.URI can handle alone 2116 * will file bug against java.net.URI 2117 */ 2118 try { 2119 return expandSystemIdStrictOff1(systemId, baseSystemId); 2120 } catch (URISyntaxException ex) { 2121 // continue on... 2122 } 2123 } 2124 // check for bad parameters id 2125 if (systemId.length() == 0) { 2126 return systemId; 2127 } 2128 2129 // normalize id 2130 String id = fixURI(systemId); 2131 2132 // normalize base 2133 URI base = null; 2134 URI uri = null; 2135 try { 2136 if (baseSystemId == null || baseSystemId.length() == 0 || 2137 baseSystemId.equals(systemId)) { 2138 base = getUserDir(); 2139 } 2140 else { 2141 try { 2142 base = new URI(fixURI(baseSystemId).trim()); 2143 } 2144 catch (URI.MalformedURIException e) { 2145 if (baseSystemId.indexOf(':') != -1) { 2146 // for xml schemas we might have baseURI with 2147 // a specified drive 2148 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2149 } 2150 else { 2151 base = new URI(getUserDir(), fixURI(baseSystemId)); 2152 } 2153 } 2154 } 2155 // expand id 2156 uri = new URI(base, id.trim()); 2157 } 2158 catch (Exception e) { 2159 // let it go through 2160 2161 } 2162 2163 if (uri == null) { 2164 return systemId; 2165 } 2166 return uri.toString(); 2167 2168 } // expandSystemId(String,String,boolean):String 2169 2170 /** 2171 * Helper method for expandSystemId(String,String,boolean):String 2172 */ 2173 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2174 throws URI.MalformedURIException { 2175 2176 URI systemURI = new URI(systemId, true); 2177 // If it's already an absolute one, return it 2178 if (systemURI.isAbsoluteURI()) { 2179 return systemId; 2180 } 2181 2182 // If there isn't a base URI, use the working directory 2183 URI baseURI = null; 2184 if (baseSystemId == null || baseSystemId.length() == 0) { 2185 baseURI = getUserDir(); 2186 } 2187 else { 2188 baseURI = new URI(baseSystemId, true); 2189 if (!baseURI.isAbsoluteURI()) { 2190 // assume "base" is also a relative uri 2191 baseURI.absolutize(getUserDir()); 2192 } 2193 } 2194 2195 // absolutize the system identifier using the base URI 2196 systemURI.absolutize(baseURI); 2197 2198 // return the string rep of the new uri (an absolute one) 2199 return systemURI.toString(); 2200 2201 // if any exception is thrown, it'll get thrown to the caller. 2202 2203 } // expandSystemIdStrictOn(String,String):String 2204 2205 /** 2206 * Helper method for expandSystemId(String,String,boolean):String 2207 */ 2208 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2209 throws URI.MalformedURIException { 2210 2211 URI systemURI = new URI(systemId, true); 2212 // If it's already an absolute one, return it 2213 if (systemURI.isAbsoluteURI()) { 2214 if (systemURI.getScheme().length() > 1) { 2215 return systemId; 2216 } 2217 /** 2218 * If the scheme's length is only one character, 2219 * it's likely that this was intended as a file 2220 * path. Fixing this up in expandSystemId to 2221 * maintain backwards compatibility. 2222 */ 2223 throw new URI.MalformedURIException(); 2224 } 2225 2226 // If there isn't a base URI, use the working directory 2227 URI baseURI = null; 2228 if (baseSystemId == null || baseSystemId.length() == 0) { 2229 baseURI = getUserDir(); 2230 } 2231 else { 2232 baseURI = new URI(baseSystemId, true); 2233 if (!baseURI.isAbsoluteURI()) { 2234 // assume "base" is also a relative uri 2235 baseURI.absolutize(getUserDir()); 2236 } 2237 } 2238 2239 // absolutize the system identifier using the base URI 2240 systemURI.absolutize(baseURI); 2241 2242 // return the string rep of the new uri (an absolute one) 2243 return systemURI.toString(); 2244 2245 // if any exception is thrown, it'll get thrown to the caller. 2246 2247 } // expandSystemIdStrictOff(String,String):String 2248 2249 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2250 throws URISyntaxException, URI.MalformedURIException { 2251 2252 java.net.URI systemURI = new java.net.URI(systemId); 2253 // If it's already an absolute one, return it 2254 if (systemURI.isAbsolute()) { 2255 if (systemURI.getScheme().length() > 1) { 2256 return systemId; 2257 } 2258 /** 2259 * If the scheme's length is only one character, 2260 * it's likely that this was intended as a file 2261 * path. Fixing this up in expandSystemId to 2262 * maintain backwards compatibility. 2263 */ 2264 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2265 } 2266 2267 // If there isn't a base URI, use the working directory 2268 URI baseURI = null; 2269 if (baseSystemId == null || baseSystemId.length() == 0) { 2270 baseURI = getUserDir(); 2271 } 2272 else { 2273 baseURI = new URI(baseSystemId, true); 2274 if (!baseURI.isAbsoluteURI()) { 2275 // assume "base" is also a relative uri 2276 baseURI.absolutize(getUserDir()); 2277 } 2278 } 2279 2280 // absolutize the system identifier using the base URI 2281 // systemURI.absolutize(baseURI); 2282 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2283 2284 // return the string rep of the new uri (an absolute one) 2285 return systemURI.toString(); 2286 2287 // if any exception is thrown, it'll get thrown to the caller. 2288 2289 } // expandSystemIdStrictOff(String,String):String 2290 2291 // 2292 // Protected methods 2293 // 2294 2295 2296 /** 2297 * Returns the IANA encoding name that is auto-detected from 2298 * the bytes specified, with the endian-ness of that encoding where appropriate. 2299 * 2300 * @param b4 The first four bytes of the input. 2301 * @param count The number of bytes actually read. 2302 * @return a 2-element array: the first element, an IANA-encoding string, 2303 * the second element a Boolean which is true iff the document is big endian, false 2304 * if it's little-endian, and null if the distinction isn't relevant. 2305 */ 2306 protected Object[] getEncodingName(byte[] b4, int count) { 2307 2308 if (count < 2) { 2309 return defaultEncoding; 2310 } 2311 2312 // UTF-16, with BOM 2313 int b0 = b4[0] & 0xFF; 2314 int b1 = b4[1] & 0xFF; 2315 if (b0 == 0xFE && b1 == 0xFF) { 2316 // UTF-16, big-endian 2317 return new Object [] {"UTF-16BE", new Boolean(true)}; 2318 } 2319 if (b0 == 0xFF && b1 == 0xFE) { 2320 // UTF-16, little-endian 2321 return new Object [] {"UTF-16LE", new Boolean(false)}; 2322 } 2323 2324 // default to UTF-8 if we don't have enough bytes to make a 2325 // good determination of the encoding 2326 if (count < 3) { 2327 return defaultEncoding; 2328 } 2329 2330 // UTF-8 with a BOM 2331 int b2 = b4[2] & 0xFF; 2332 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2333 return defaultEncoding; 2334 } 2335 2336 // default to UTF-8 if we don't have enough bytes to make a 2337 // good determination of the encoding 2338 if (count < 4) { 2339 return defaultEncoding; 2340 } 2341 2342 // other encodings 2343 int b3 = b4[3] & 0xFF; 2344 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2345 // UCS-4, big endian (1234) 2346 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2347 } 2348 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2349 // UCS-4, little endian (4321) 2350 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2351 } 2352 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2353 // UCS-4, unusual octet order (2143) 2354 // REVISIT: What should this be? 2355 return new Object [] {"ISO-10646-UCS-4", null}; 2356 } 2357 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2358 // UCS-4, unusual octect order (3412) 2359 // REVISIT: What should this be? 2360 return new Object [] {"ISO-10646-UCS-4", null}; 2361 } 2362 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2363 // UTF-16, big-endian, no BOM 2364 // (or could turn out to be UCS-2... 2365 // REVISIT: What should this be? 2366 return new Object [] {"UTF-16BE", new Boolean(true)}; 2367 } 2368 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2369 // UTF-16, little-endian, no BOM 2370 // (or could turn out to be UCS-2... 2371 return new Object [] {"UTF-16LE", new Boolean(false)}; 2372 } 2373 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2374 // EBCDIC 2375 // a la xerces1, return CP037 instead of EBCDIC here 2376 return new Object [] {"CP037", null}; 2377 } 2378 2379 return defaultEncoding; 2380 2381 } // getEncodingName(byte[],int):Object[] 2382 2383 /** 2384 * Creates a reader capable of reading the given input stream in 2385 * the specified encoding. 2386 * 2387 * @param inputStream The input stream. 2388 * @param encoding The encoding name that the input stream is 2389 * encoded using. If the user has specified that 2390 * Java encoding names are allowed, then the 2391 * encoding name may be a Java encoding name; 2392 * otherwise, it is an ianaEncoding name. 2393 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2394 * specify a byte order, this tells whether the order is bigEndian. null menas 2395 * unknown or not relevant. 2396 * 2397 * @return Returns a reader. 2398 */ 2399 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2400 throws IOException { 2401 2402 // normalize encoding name 2403 if (encoding == null) { 2404 encoding = "UTF-8"; 2405 } 2406 2407 // try to use an optimized reader 2408 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2409 if (ENCODING.equals("UTF-8")) { 2410 if (DEBUG_ENCODINGS) { 2411 System.out.println("$$$ creating UTF8Reader"); 2412 } 2413 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2414 } 2415 if (ENCODING.equals("US-ASCII")) { 2416 if (DEBUG_ENCODINGS) { 2417 System.out.println("$$$ creating ASCIIReader"); 2418 } 2419 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2420 } 2421 if(ENCODING.equals("ISO-10646-UCS-4")) { 2422 if(isBigEndian != null) { 2423 boolean isBE = isBigEndian.booleanValue(); 2424 if(isBE) { 2425 return new UCSReader(inputStream, UCSReader.UCS4BE); 2426 } else { 2427 return new UCSReader(inputStream, UCSReader.UCS4LE); 2428 } 2429 } else { 2430 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2431 "EncodingByteOrderUnsupported", 2432 new Object[] { encoding }, 2433 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2434 } 2435 } 2436 if(ENCODING.equals("ISO-10646-UCS-2")) { 2437 if(isBigEndian != null) { // sould never happen with this encoding... 2438 boolean isBE = isBigEndian.booleanValue(); 2439 if(isBE) { 2440 return new UCSReader(inputStream, UCSReader.UCS2BE); 2441 } else { 2442 return new UCSReader(inputStream, UCSReader.UCS2LE); 2443 } 2444 } else { 2445 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2446 "EncodingByteOrderUnsupported", 2447 new Object[] { encoding }, 2448 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2449 } 2450 } 2451 2452 // check for valid name 2453 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2454 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2455 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2456 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2457 "EncodingDeclInvalid", 2458 new Object[] { encoding }, 2459 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2460 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2461 // because every byte is a valid ISO Latin 1 character. 2462 // It may not translate correctly but if we failed on 2463 // the encoding anyway, then we're expecting the content 2464 // of the document to be bad. This will just prevent an 2465 // invalid UTF-8 sequence to be detected. This is only 2466 // important when continue-after-fatal-error is turned 2467 // on. -Ac 2468 encoding = "ISO-8859-1"; 2469 } 2470 2471 // try to use a Java reader 2472 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2473 if (javaEncoding == null) { 2474 if(fAllowJavaEncodings) { 2475 javaEncoding = encoding; 2476 } else { 2477 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2478 "EncodingDeclInvalid", 2479 new Object[] { encoding }, 2480 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2481 // see comment above. 2482 javaEncoding = "ISO8859_1"; 2483 } 2484 } 2485 if (DEBUG_ENCODINGS) { 2486 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2487 if (javaEncoding == encoding) { 2488 System.out.print(" (IANA encoding)"); 2489 } 2490 System.out.println(); 2491 } 2492 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2493 2494 } // createReader(InputStream,String, Boolean): Reader 2495 2496 2497 /** 2498 * Return the public identifier for the current document event. 2499 * <p> 2500 * The return value is the public identifier of the document 2501 * entity or of the external parsed entity in which the markup 2502 * triggering the event appears. 2503 * 2504 * @return A string containing the public identifier, or 2505 * null if none is available. 2506 */ 2507 public String getPublicId() { 2508 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2509 } // getPublicId():String 2510 2511 /** 2512 * Return the expanded system identifier for the current document event. 2513 * <p> 2514 * The return value is the expanded system identifier of the document 2515 * entity or of the external parsed entity in which the markup 2516 * triggering the event appears. 2517 * <p> 2518 * If the system identifier is a URL, the parser must resolve it 2519 * fully before passing it to the application. 2520 * 2521 * @return A string containing the expanded system identifier, or null 2522 * if none is available. 2523 */ 2524 public String getExpandedSystemId() { 2525 if (fCurrentEntity != null) { 2526 if (fCurrentEntity.entityLocation != null && 2527 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2528 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2529 } else { 2530 // search for the first external entity on the stack 2531 int size = fEntityStack.size(); 2532 for (int i = size - 1; i >= 0 ; i--) { 2533 Entity.ScannedEntity externalEntity = 2534 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2535 2536 if (externalEntity.entityLocation != null && 2537 externalEntity.entityLocation.getExpandedSystemId() != null) { 2538 return externalEntity.entityLocation.getExpandedSystemId(); 2539 } 2540 } 2541 } 2542 } 2543 return null; 2544 } // getExpandedSystemId():String 2545 2546 /** 2547 * Return the literal system identifier for the current document event. 2548 * <p> 2549 * The return value is the literal system identifier of the document 2550 * entity or of the external parsed entity in which the markup 2551 * triggering the event appears. 2552 * <p> 2553 * @return A string containing the literal system identifier, or null 2554 * if none is available. 2555 */ 2556 public String getLiteralSystemId() { 2557 if (fCurrentEntity != null) { 2558 if (fCurrentEntity.entityLocation != null && 2559 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2560 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2561 } else { 2562 // search for the first external entity on the stack 2563 int size = fEntityStack.size(); 2564 for (int i = size - 1; i >= 0 ; i--) { 2565 Entity.ScannedEntity externalEntity = 2566 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2567 2568 if (externalEntity.entityLocation != null && 2569 externalEntity.entityLocation.getLiteralSystemId() != null) { 2570 return externalEntity.entityLocation.getLiteralSystemId(); 2571 } 2572 } 2573 } 2574 } 2575 return null; 2576 } // getLiteralSystemId():String 2577 2578 /** 2579 * Return the line number where the current document event ends. 2580 * <p> 2581 * <strong>Warning:</strong> The return value from the method 2582 * is intended only as an approximation for the sake of error 2583 * reporting; it is not intended to provide sufficient information 2584 * to edit the character content of the original XML document. 2585 * <p> 2586 * The return value is an approximation of the line number 2587 * in the document entity or external parsed entity where the 2588 * markup triggering the event appears. 2589 * <p> 2590 * If possible, the SAX driver should provide the line position 2591 * of the first character after the text associated with the document 2592 * event. The first line in the document is line 1. 2593 * 2594 * @return The line number, or -1 if none is available. 2595 */ 2596 public int getLineNumber() { 2597 if (fCurrentEntity != null) { 2598 if (fCurrentEntity.isExternal()) { 2599 return fCurrentEntity.lineNumber; 2600 } else { 2601 // search for the first external entity on the stack 2602 int size = fEntityStack.size(); 2603 for (int i=size-1; i>0 ; i--) { 2604 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2605 if (firstExternalEntity.isExternal()) { 2606 return firstExternalEntity.lineNumber; 2607 } 2608 } 2609 } 2610 } 2611 2612 return -1; 2613 2614 } // getLineNumber():int 2615 2616 /** 2617 * Return the column number where the current document event ends. 2618 * <p> 2619 * <strong>Warning:</strong> The return value from the method 2620 * is intended only as an approximation for the sake of error 2621 * reporting; it is not intended to provide sufficient information 2622 * to edit the character content of the original XML document. 2623 * <p> 2624 * The return value is an approximation of the column number 2625 * in the document entity or external parsed entity where the 2626 * markup triggering the event appears. 2627 * <p> 2628 * If possible, the SAX driver should provide the line position 2629 * of the first character after the text associated with the document 2630 * event. 2631 * <p> 2632 * If possible, the SAX driver should provide the line position 2633 * of the first character after the text associated with the document 2634 * event. The first column in each line is column 1. 2635 * 2636 * @return The column number, or -1 if none is available. 2637 */ 2638 public int getColumnNumber() { 2639 if (fCurrentEntity != null) { 2640 if (fCurrentEntity.isExternal()) { 2641 return fCurrentEntity.columnNumber; 2642 } else { 2643 // search for the first external entity on the stack 2644 int size = fEntityStack.size(); 2645 for (int i=size-1; i>0 ; i--) { 2646 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2647 if (firstExternalEntity.isExternal()) { 2648 return firstExternalEntity.columnNumber; 2649 } 2650 } 2651 } 2652 } 2653 2654 return -1; 2655 } // getColumnNumber():int 2656 2657 2658 // 2659 // Protected static methods 2660 // 2661 2662 /** 2663 * Fixes a platform dependent filename to standard URI form. 2664 * 2665 * @param str The string to fix. 2666 * 2667 * @return Returns the fixed URI string. 2668 */ 2669 protected static String fixURI(String str) { 2670 2671 // handle platform dependent strings 2672 str = str.replace(java.io.File.separatorChar, '/'); 2673 2674 // Windows fix 2675 if (str.length() >= 2) { 2676 char ch1 = str.charAt(1); 2677 // change "C:blah" to "/C:blah" 2678 if (ch1 == ':') { 2679 char ch0 = Character.toUpperCase(str.charAt(0)); 2680 if (ch0 >= 'A' && ch0 <= 'Z') { 2681 str = "/" + str; 2682 } 2683 } 2684 // change "//blah" to "file://blah" 2685 else if (ch1 == '/' && str.charAt(0) == '/') { 2686 str = "file:" + str; 2687 } 2688 } 2689 2690 // replace spaces in file names with %20. 2691 // Original comment from JDK5: the following algorithm might not be 2692 // very performant, but people who want to use invalid URI's have to 2693 // pay the price. 2694 int pos = str.indexOf(' '); 2695 if (pos >= 0) { 2696 StringBuilder sb = new StringBuilder(str.length()); 2697 // put characters before ' ' into the string builder 2698 for (int i = 0; i < pos; i++) 2699 sb.append(str.charAt(i)); 2700 // and %20 for the space 2701 sb.append("%20"); 2702 // for the remamining part, also convert ' ' to "%20". 2703 for (int i = pos+1; i < str.length(); i++) { 2704 if (str.charAt(i) == ' ') 2705 sb.append("%20"); 2706 else 2707 sb.append(str.charAt(i)); 2708 } 2709 str = sb.toString(); 2710 } 2711 2712 // done 2713 return str; 2714 2715 } // fixURI(String):String 2716 2717 2718 // 2719 // Package visible methods 2720 // 2721 /** Prints the contents of the buffer. */ 2722 final void print() { 2723 if (DEBUG_BUFFER) { 2724 if (fCurrentEntity != null) { 2725 System.out.print('['); 2726 System.out.print(fCurrentEntity.count); 2727 System.out.print(' '); 2728 System.out.print(fCurrentEntity.position); 2729 if (fCurrentEntity.count > 0) { 2730 System.out.print(" \""); 2731 for (int i = 0; i < fCurrentEntity.count; i++) { 2732 if (i == fCurrentEntity.position) { 2733 System.out.print('^'); 2734 } 2735 char c = fCurrentEntity.ch[i]; 2736 switch (c) { 2737 case '\n': { 2738 System.out.print("\\n"); 2739 break; 2740 } 2741 case '\r': { 2742 System.out.print("\\r"); 2743 break; 2744 } 2745 case '\t': { 2746 System.out.print("\\t"); 2747 break; 2748 } 2749 case '\\': { 2750 System.out.print("\\\\"); 2751 break; 2752 } 2753 default: { 2754 System.out.print(c); 2755 } 2756 } 2757 } 2758 if (fCurrentEntity.position == fCurrentEntity.count) { 2759 System.out.print('^'); 2760 } 2761 System.out.print('"'); 2762 } 2763 System.out.print(']'); 2764 System.out.print(" @ "); 2765 System.out.print(fCurrentEntity.lineNumber); 2766 System.out.print(','); 2767 System.out.print(fCurrentEntity.columnNumber); 2768 } else { 2769 System.out.print("*NO CURRENT ENTITY*"); 2770 } 2771 } 2772 } // print() 2773 2774 /** 2775 * Buffer used in entity manager to reuse character arrays instead 2776 * of creating new ones every time. 2777 * 2778 * @xerces.internal 2779 * 2780 * @author Ankit Pasricha, IBM 2781 */ 2782 private static class CharacterBuffer { 2783 2784 /** character buffer */ 2785 private char[] ch; 2786 2787 /** whether the buffer is for an external or internal scanned entity */ 2788 private boolean isExternal; 2789 2790 public CharacterBuffer(boolean isExternal, int size) { 2791 this.isExternal = isExternal; 2792 ch = new char[size]; 2793 } 2794 } 2795 2796 2797 /** 2798 * Stores a number of character buffers and provides it to the entity 2799 * manager to use when an entity is seen. 2800 * 2801 * @xerces.internal 2802 * 2803 * @author Ankit Pasricha, IBM 2804 */ 2805 private static class CharacterBufferPool { 2806 2807 private static final int DEFAULT_POOL_SIZE = 3; 2808 2809 private CharacterBuffer[] fInternalBufferPool; 2810 private CharacterBuffer[] fExternalBufferPool; 2811 2812 private int fExternalBufferSize; 2813 private int fInternalBufferSize; 2814 private int poolSize; 2815 2816 private int fInternalTop; 2817 private int fExternalTop; 2818 2819 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2820 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2821 } 2822 2823 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2824 fExternalBufferSize = externalBufferSize; 2825 fInternalBufferSize = internalBufferSize; 2826 this.poolSize = poolSize; 2827 init(); 2828 } 2829 2830 /** Initializes buffer pool. **/ 2831 private void init() { 2832 fInternalBufferPool = new CharacterBuffer[poolSize]; 2833 fExternalBufferPool = new CharacterBuffer[poolSize]; 2834 fInternalTop = -1; 2835 fExternalTop = -1; 2836 } 2837 2838 /** Retrieves buffer from pool. **/ 2839 public CharacterBuffer getBuffer(boolean external) { 2840 if (external) { 2841 if (fExternalTop > -1) { 2842 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2843 } 2844 else { 2845 return new CharacterBuffer(true, fExternalBufferSize); 2846 } 2847 } 2848 else { 2849 if (fInternalTop > -1) { 2850 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2851 } 2852 else { 2853 return new CharacterBuffer(false, fInternalBufferSize); 2854 } 2855 } 2856 } 2857 2858 /** Returns buffer to pool. **/ 2859 public void returnToPool(CharacterBuffer buffer) { 2860 if (buffer.isExternal) { 2861 if (fExternalTop < fExternalBufferPool.length - 1) { 2862 fExternalBufferPool[++fExternalTop] = buffer; 2863 } 2864 } 2865 else if (fInternalTop < fInternalBufferPool.length - 1) { 2866 fInternalBufferPool[++fInternalTop] = buffer; 2867 } 2868 } 2869 2870 /** Sets the size of external buffers and dumps the old pool. **/ 2871 public void setExternalBufferSize(int bufferSize) { 2872 fExternalBufferSize = bufferSize; 2873 fExternalBufferPool = new CharacterBuffer[poolSize]; 2874 fExternalTop = -1; 2875 } 2876 } 2877 2878 /** 2879 * This class wraps the byte inputstreams we're presented with. 2880 * We need it because java.io.InputStreams don't provide 2881 * functionality to reread processed bytes, and they have a habit 2882 * of reading more than one character when you call their read() 2883 * methods. This means that, once we discover the true (declared) 2884 * encoding of a document, we can neither backtrack to read the 2885 * whole doc again nor start reading where we are with a new 2886 * reader. 2887 * 2888 * This class allows rewinding an inputStream by allowing a mark 2889 * to be set, and the stream reset to that position. <strong>The 2890 * class assumes that it needs to read one character per 2891 * invocation when it's read() method is inovked, but uses the 2892 * underlying InputStream's read(char[], offset length) method--it 2893 * won't buffer data read this way!</strong> 2894 * 2895 * @xerces.internal 2896 * 2897 * @author Neil Graham, IBM 2898 * @author Glenn Marcy, IBM 2899 */ 2900 2901 protected final class RewindableInputStream extends InputStream { 2902 2903 private InputStream fInputStream; 2904 private byte[] fData; 2905 private int fStartOffset; 2906 private int fEndOffset; 2907 private int fOffset; 2908 private int fLength; 2909 private int fMark; 2910 2911 public RewindableInputStream(InputStream is) { 2912 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2913 fInputStream = is; 2914 fStartOffset = 0; 2915 fEndOffset = -1; 2916 fOffset = 0; 2917 fLength = 0; 2918 fMark = 0; 2919 } 2920 2921 public void setStartOffset(int offset) { 2922 fStartOffset = offset; 2923 } 2924 2925 public void rewind() { 2926 fOffset = fStartOffset; 2927 } 2928 2929 public int read() throws IOException { 2930 int b = 0; 2931 if (fOffset < fLength) { 2932 return fData[fOffset++] & 0xff; 2933 } 2934 if (fOffset == fEndOffset) { 2935 return -1; 2936 } 2937 if (fOffset == fData.length) { 2938 byte[] newData = new byte[fOffset << 1]; 2939 System.arraycopy(fData, 0, newData, 0, fOffset); 2940 fData = newData; 2941 } 2942 b = fInputStream.read(); 2943 if (b == -1) { 2944 fEndOffset = fOffset; 2945 return -1; 2946 } 2947 fData[fLength++] = (byte)b; 2948 fOffset++; 2949 return b & 0xff; 2950 } 2951 2952 public int read(byte[] b, int off, int len) throws IOException { 2953 int bytesLeft = fLength - fOffset; 2954 if (bytesLeft == 0) { 2955 if (fOffset == fEndOffset) { 2956 return -1; 2957 } 2958 2959 /** 2960 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2961 * //System.out.println("fInputStream = " + fInputStream ); 2962 * // better get some more for the voracious reader... */ 2963 2964 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2965 2966 if (!fCurrentEntity.xmlDeclChunkRead) 2967 { 2968 fCurrentEntity.xmlDeclChunkRead = true; 2969 len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2970 } 2971 return fInputStream.read(b, off, len); 2972 } 2973 2974 int returnedVal = read(); 2975 if(returnedVal == -1) { 2976 fEndOffset = fOffset; 2977 return -1; 2978 } 2979 b[off] = (byte)returnedVal; 2980 return 1; 2981 2982 } 2983 if (len < bytesLeft) { 2984 if (len <= 0) { 2985 return 0; 2986 } 2987 } else { 2988 len = bytesLeft; 2989 } 2990 if (b != null) { 2991 System.arraycopy(fData, fOffset, b, off, len); 2992 } 2993 fOffset += len; 2994 return len; 2995 } 2996 2997 public long skip(long n) 2998 throws IOException { 2999 int bytesLeft; 3000 if (n <= 0) { 3001 return 0; 3002 } 3003 bytesLeft = fLength - fOffset; 3004 if (bytesLeft == 0) { 3005 if (fOffset == fEndOffset) { 3006 return 0; 3007 } 3008 return fInputStream.skip(n); 3009 } 3010 if (n <= bytesLeft) { 3011 fOffset += n; 3012 return n; 3013 } 3014 fOffset += bytesLeft; 3015 if (fOffset == fEndOffset) { 3016 return bytesLeft; 3017 } 3018 n -= bytesLeft; 3019 /* 3020 * In a manner of speaking, when this class isn't permitting more 3021 * than one byte at a time to be read, it is "blocking". The 3022 * available() method should indicate how much can be read without 3023 * blocking, so while we're in this mode, it should only indicate 3024 * that bytes in its buffer are available; otherwise, the result of 3025 * available() on the underlying InputStream is appropriate. 3026 */ 3027 return fInputStream.skip(n) + bytesLeft; 3028 } 3029 3030 public int available() throws IOException { 3031 int bytesLeft = fLength - fOffset; 3032 if (bytesLeft == 0) { 3033 if (fOffset == fEndOffset) { 3034 return -1; 3035 } 3036 return fCurrentEntity.mayReadChunks ? fInputStream.available() 3037 : 0; 3038 } 3039 return bytesLeft; 3040 } 3041 3042 public void mark(int howMuch) { 3043 fMark = fOffset; 3044 } 3045 3046 public void reset() { 3047 fOffset = fMark; 3048 //test(); 3049 } 3050 3051 public boolean markSupported() { 3052 return true; 3053 } 3054 3055 public void close() throws IOException { 3056 if (fInputStream != null) { 3057 fInputStream.close(); 3058 fInputStream = null; 3059 } 3060 } 3061 } // end of RewindableInputStream class 3062 3063 public void test(){ 3064 //System.out.println("TESTING: Added familytree to entityManager"); 3065 //Usecase1 3066 fEntityStorage.addExternalEntity("entityUsecase1",null, 3067 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 3068 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 3069 3070 //Usecase2 3071 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3072 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3073 fEntityStorage.addInternalEntity("text", "Hello World."); 3074 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3075 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3076 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3077 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3078 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3079 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3080 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3081 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3082 fEntityStorage.addInternalEntity("ch","©"); 3083 fEntityStorage.addInternalEntity("ch1","T"); 3084 fEntityStorage.addInternalEntity("% ch2","param"); 3085 } 3086 3087 } // class XMLEntityManager