1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 24 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 25 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 26 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 27 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 28 import com.sun.org.apache.xerces.internal.util.*; 29 import com.sun.org.apache.xerces.internal.util.URI; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 36 import com.sun.org.apache.xerces.internal.xni.XNIException; 37 import com.sun.org.apache.xerces.internal.xni.parser.*; 38 import com.sun.xml.internal.stream.Entity; 39 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 40 import com.sun.xml.internal.stream.StaxXMLInputSource; 41 import com.sun.xml.internal.stream.XMLEntityStorage; 42 import java.io.*; 43 import java.lang.reflect.Method; 44 import java.net.HttpURLConnection; 45 import java.net.URISyntaxException; 46 import java.net.URL; 47 import java.net.URLConnection; 48 import java.util.HashMap; 49 import java.util.Iterator; 50 import java.util.Locale; 51 import java.util.Map; 52 import java.util.Stack; 53 import javax.xml.stream.XMLInputFactory; 54 55 56 /** 57 * Will keep track of current entity. 58 * 59 * The entity manager handles the registration of general and parameter 60 * entities; resolves entities; and starts entities. The entity manager 61 * is a central component in a standard parser configuration and this 62 * class works directly with the entity scanner to manage the underlying 63 * xni. 64 * <p> 65 * This component requires the following features and properties from the 66 * component manager that uses it: 67 * <ul> 68 * <li>http://xml.org/sax/features/validation</li> 69 * <li>http://xml.org/sax/features/external-general-entities</li> 70 * <li>http://xml.org/sax/features/external-parameter-entities</li> 71 * <li>http://apache.org/xml/features/allow-java-encodings</li> 72 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 73 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 74 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 75 * </ul> 76 * 77 * 78 * @author Andy Clark, IBM 79 * @author Arnaud Le Hors, IBM 80 * @author K.Venugopal SUN Microsystems 81 * @author Neeraj Bajaj SUN Microsystems 82 * @author Sunitha Reddy SUN Microsystems 83 * @version $Id: XMLEntityManager.java,v 1.17 2010-11-01 04:39:41 joehw Exp $ 84 */ 85 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 86 87 // 88 // Constants 89 // 90 91 /** Default buffer size (2048). */ 92 public static final int DEFAULT_BUFFER_SIZE = 8192; 93 94 /** Default buffer size before we've finished with the XMLDecl: */ 95 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 96 97 /** Default internal entity buffer size (1024). */ 98 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 99 100 // feature identifiers 101 102 /** Feature identifier: validation. */ 103 protected static final String VALIDATION = 104 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 105 106 /** 107 * standard uri conformant (strict uri). 108 * http://apache.org/xml/features/standard-uri-conformant 109 */ 110 protected boolean fStrictURI; 111 112 113 /** Feature identifier: external general entities. */ 114 protected static final String EXTERNAL_GENERAL_ENTITIES = 115 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 116 117 /** Feature identifier: external parameter entities. */ 118 protected static final String EXTERNAL_PARAMETER_ENTITIES = 119 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 120 121 /** Feature identifier: allow Java encodings. */ 122 protected static final String ALLOW_JAVA_ENCODINGS = 123 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 124 125 /** Feature identifier: warn on duplicate EntityDef */ 126 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 127 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 128 129 /** Feature identifier: load external DTD. */ 130 protected static final String LOAD_EXTERNAL_DTD = 131 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 132 133 // property identifiers 134 135 /** Property identifier: symbol table. */ 136 protected static final String SYMBOL_TABLE = 137 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 138 139 /** Property identifier: error reporter. */ 140 protected static final String ERROR_REPORTER = 141 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 142 143 /** Feature identifier: standard uri conformant */ 144 protected static final String STANDARD_URI_CONFORMANT = 145 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 146 147 /** Property identifier: entity resolver. */ 148 protected static final String ENTITY_RESOLVER = 149 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 150 151 protected static final String STAX_ENTITY_RESOLVER = 152 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 153 154 // property identifier: ValidationManager 155 protected static final String VALIDATION_MANAGER = 156 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 157 158 /** property identifier: buffer size. */ 159 protected static final String BUFFER_SIZE = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 161 162 /** property identifier: security manager. */ 163 protected static final String SECURITY_MANAGER = 164 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 165 166 protected static final String PARSER_SETTINGS = 167 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 168 169 /** Property identifier: Security property manager. */ 170 private static final String XML_SECURITY_PROPERTY_MANAGER = 171 Constants.XML_SECURITY_PROPERTY_MANAGER; 172 173 /** access external dtd: file protocol */ 174 static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 175 176 // recognized features and properties 177 178 /** Recognized features. */ 179 private static final String[] RECOGNIZED_FEATURES = { 180 VALIDATION, 181 EXTERNAL_GENERAL_ENTITIES, 182 EXTERNAL_PARAMETER_ENTITIES, 183 ALLOW_JAVA_ENCODINGS, 184 WARN_ON_DUPLICATE_ENTITYDEF, 185 STANDARD_URI_CONFORMANT 186 }; 187 188 /** Feature defaults. */ 189 private static final Boolean[] FEATURE_DEFAULTS = { 190 null, 191 Boolean.TRUE, 192 Boolean.TRUE, 193 Boolean.TRUE, 194 Boolean.FALSE, 195 Boolean.FALSE 196 }; 197 198 /** Recognized properties. */ 199 private static final String[] RECOGNIZED_PROPERTIES = { 200 SYMBOL_TABLE, 201 ERROR_REPORTER, 202 ENTITY_RESOLVER, 203 VALIDATION_MANAGER, 204 BUFFER_SIZE, 205 SECURITY_MANAGER, 206 XML_SECURITY_PROPERTY_MANAGER 207 }; 208 209 /** Property defaults. */ 210 private static final Object[] PROPERTY_DEFAULTS = { 211 null, 212 null, 213 null, 214 null, 215 new Integer(DEFAULT_BUFFER_SIZE), 216 null, 217 null 218 }; 219 220 private static final String XMLEntity = "[xml]".intern(); 221 private static final String DTDEntity = "[dtd]".intern(); 222 223 // debugging 224 225 /** 226 * Debug printing of buffer. This debugging flag works best when you 227 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 228 * 64 characters. 229 */ 230 private static final boolean DEBUG_BUFFER = false; 231 232 /** warn on duplicate Entity declaration. 233 * http://apache.org/xml/features/warn-on-duplicate-entitydef 234 */ 235 protected boolean fWarnDuplicateEntityDef; 236 237 /** Debug some basic entities. */ 238 private static final boolean DEBUG_ENTITIES = false; 239 240 /** Debug switching readers for encodings. */ 241 private static final boolean DEBUG_ENCODINGS = false; 242 243 // should be diplayed trace resolving messages 244 private static final boolean DEBUG_RESOLVER = false ; 245 246 // 247 // Data 248 // 249 250 // features 251 252 /** 253 * Validation. This feature identifier is: 254 * http://xml.org/sax/features/validation 255 */ 256 protected boolean fValidation; 257 258 /** 259 * External general entities. This feature identifier is: 260 * http://xml.org/sax/features/external-general-entities 261 */ 262 protected boolean fExternalGeneralEntities; 263 264 /** 265 * External parameter entities. This feature identifier is: 266 * http://xml.org/sax/features/external-parameter-entities 267 */ 268 protected boolean fExternalParameterEntities; 269 270 /** 271 * Allow Java encoding names. This feature identifier is: 272 * http://apache.org/xml/features/allow-java-encodings 273 */ 274 protected boolean fAllowJavaEncodings = true ; 275 276 /** Load external DTD. */ 277 protected boolean fLoadExternalDTD = true; 278 279 // properties 280 281 /** 282 * Symbol table. This property identifier is: 283 * http://apache.org/xml/properties/internal/symbol-table 284 */ 285 protected SymbolTable fSymbolTable; 286 287 /** 288 * Error reporter. This property identifier is: 289 * http://apache.org/xml/properties/internal/error-reporter 290 */ 291 protected XMLErrorReporter fErrorReporter; 292 293 /** 294 * Entity resolver. This property identifier is: 295 * http://apache.org/xml/properties/internal/entity-resolver 296 */ 297 protected XMLEntityResolver fEntityResolver; 298 299 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 300 301 protected StaxEntityResolverWrapper fStaxEntityResolver; 302 303 /** Property Manager. This is used from Stax */ 304 protected PropertyManager fPropertyManager ; 305 306 /** StAX properties */ 307 boolean fSupportDTD = true; 308 boolean fReplaceEntityReferences = true; 309 boolean fSupportExternalEntities = true; 310 311 /** used to restrict external access */ 312 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 313 314 // settings 315 316 /** 317 * Validation manager. This property identifier is: 318 * http://apache.org/xml/properties/internal/validation-manager 319 */ 320 protected ValidationManager fValidationManager; 321 322 // settings 323 324 /** 325 * Buffer size. We get this value from a property. The default size 326 * is used if the input buffer size property is not specified. 327 * REVISIT: do we need a property for internal entity buffer size? 328 */ 329 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 330 331 /** Security Manager */ 332 protected XMLSecurityManager fSecurityManager = null; 333 334 protected XMLLimitAnalyzer fLimitAnalyzer = null; 335 336 protected int entityExpansionIndex; 337 338 /** 339 * True if the document entity is standalone. This should really 340 * only be set by the document source (e.g. XMLDocumentScanner). 341 */ 342 protected boolean fStandalone; 343 344 // are the entities being parsed in the external subset? 345 // NOTE: this *is not* the same as whether they're external entities! 346 protected boolean fInExternalSubset = false; 347 348 349 // handlers 350 /** Entity handler. */ 351 protected XMLEntityHandler fEntityHandler; 352 353 /** Current entity scanner */ 354 protected XMLEntityScanner fEntityScanner ; 355 356 /** XML 1.0 entity scanner. */ 357 protected XMLEntityScanner fXML10EntityScanner; 358 359 /** XML 1.1 entity scanner. */ 360 protected XMLEntityScanner fXML11EntityScanner; 361 362 /** count of entities expanded: */ 363 protected int fEntityExpansionCount = 0; 364 365 // entities 366 367 /** Entities. */ 368 protected Map<String, Entity> fEntities = new HashMap<>(); 369 370 /** Entity stack. */ 371 protected Stack<Entity> fEntityStack = new Stack<>(); 372 373 /** Current entity. */ 374 protected Entity.ScannedEntity fCurrentEntity = null; 375 376 /** identify if the InputSource is created by a resolver */ 377 boolean fISCreatedByResolver = false; 378 379 // shared context 380 381 protected XMLEntityStorage fEntityStorage ; 382 383 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 384 385 386 // temp vars 387 388 /** Resource identifer. */ 389 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 390 391 /** Augmentations for entities. */ 392 private final Augmentations fEntityAugs = new AugmentationsImpl(); 393 394 /** Pool of character buffers. */ 395 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 396 397 // 398 // Constructors 399 // 400 401 /** 402 * If this constructor is used to create the object, reset() should be invoked on this object 403 */ 404 public XMLEntityManager() { 405 //for entity managers not created by parsers 406 fSecurityManager = new XMLSecurityManager(true); 407 fEntityStorage = new XMLEntityStorage(this) ; 408 setScannerVersion(Constants.XML_VERSION_1_0); 409 } // <init>() 410 411 /** Default constructor. */ 412 public XMLEntityManager(PropertyManager propertyManager) { 413 fPropertyManager = propertyManager ; 414 //pass a reference to current entity being scanned 415 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 416 fEntityStorage = new XMLEntityStorage(this) ; 417 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 418 reset(propertyManager); 419 } // <init>() 420 421 /** 422 * Adds an internal entity declaration. 423 * <p> 424 * <strong>Note:</strong> This method ignores subsequent entity 425 * declarations. 426 * <p> 427 * <strong>Note:</strong> The name should be a unique symbol. The 428 * SymbolTable can be used for this purpose. 429 * 430 * @param name The name of the entity. 431 * @param text The text of the entity. 432 * 433 * @see SymbolTable 434 */ 435 public void addInternalEntity(String name, String text) { 436 if (!fEntities.containsKey(name)) { 437 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 438 fEntities.put(name, entity); 439 } else{ 440 if(fWarnDuplicateEntityDef){ 441 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 442 "MSG_DUPLICATE_ENTITY_DEFINITION", 443 new Object[]{ name }, 444 XMLErrorReporter.SEVERITY_WARNING ); 445 } 446 } 447 448 } // addInternalEntity(String,String) 449 450 /** 451 * Adds an external entity declaration. 452 * <p> 453 * <strong>Note:</strong> This method ignores subsequent entity 454 * declarations. 455 * <p> 456 * <strong>Note:</strong> The name should be a unique symbol. The 457 * SymbolTable can be used for this purpose. 458 * 459 * @param name The name of the entity. 460 * @param publicId The public identifier of the entity. 461 * @param literalSystemId The system identifier of the entity. 462 * @param baseSystemId The base system identifier of the entity. 463 * This is the system identifier of the entity 464 * where <em>the entity being added</em> and 465 * is used to expand the system identifier when 466 * the system identifier is a relative URI. 467 * When null the system identifier of the first 468 * external entity on the stack is used instead. 469 * 470 * @see SymbolTable 471 */ 472 public void addExternalEntity(String name, 473 String publicId, String literalSystemId, 474 String baseSystemId) throws IOException { 475 if (!fEntities.containsKey(name)) { 476 if (baseSystemId == null) { 477 // search for the first external entity on the stack 478 int size = fEntityStack.size(); 479 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 480 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 481 } 482 for (int i = size - 1; i >= 0 ; i--) { 483 Entity.ScannedEntity externalEntity = 484 (Entity.ScannedEntity)fEntityStack.elementAt(i); 485 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 486 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 487 break; 488 } 489 } 490 } 491 Entity entity = new Entity.ExternalEntity(name, 492 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 493 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 494 fEntities.put(name, entity); 495 } else{ 496 if(fWarnDuplicateEntityDef){ 497 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 498 "MSG_DUPLICATE_ENTITY_DEFINITION", 499 new Object[]{ name }, 500 XMLErrorReporter.SEVERITY_WARNING ); 501 } 502 } 503 504 } // addExternalEntity(String,String,String,String) 505 506 507 /** 508 * Adds an unparsed entity declaration. 509 * <p> 510 * <strong>Note:</strong> This method ignores subsequent entity 511 * declarations. 512 * <p> 513 * <strong>Note:</strong> The name should be a unique symbol. The 514 * SymbolTable can be used for this purpose. 515 * 516 * @param name The name of the entity. 517 * @param publicId The public identifier of the entity. 518 * @param systemId The system identifier of the entity. 519 * @param notation The name of the notation. 520 * 521 * @see SymbolTable 522 */ 523 public void addUnparsedEntity(String name, 524 String publicId, String systemId, 525 String baseSystemId, String notation) { 526 if (!fEntities.containsKey(name)) { 527 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 528 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 529 notation, fInExternalSubset); 530 fEntities.put(name, entity); 531 } else{ 532 if(fWarnDuplicateEntityDef){ 533 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 534 "MSG_DUPLICATE_ENTITY_DEFINITION", 535 new Object[]{ name }, 536 XMLErrorReporter.SEVERITY_WARNING ); 537 } 538 } 539 } // addUnparsedEntity(String,String,String,String) 540 541 542 /** get the entity storage object from entity manager */ 543 public XMLEntityStorage getEntityStore(){ 544 return fEntityStorage ; 545 } 546 547 /** return the entity responsible for reading the entity */ 548 public XMLEntityScanner getEntityScanner(){ 549 if(fEntityScanner == null) { 550 // default to 1.0 551 if(fXML10EntityScanner == null) { 552 fXML10EntityScanner = new XMLEntityScanner(); 553 } 554 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 555 fEntityScanner = fXML10EntityScanner; 556 } 557 return fEntityScanner; 558 559 } 560 561 public void setScannerVersion(short version) { 562 563 if(version == Constants.XML_VERSION_1_0) { 564 if(fXML10EntityScanner == null) { 565 fXML10EntityScanner = new XMLEntityScanner(); 566 } 567 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 568 fEntityScanner = fXML10EntityScanner; 569 fEntityScanner.setCurrentEntity(fCurrentEntity); 570 } else { 571 if(fXML11EntityScanner == null) { 572 fXML11EntityScanner = new XML11EntityScanner(); 573 } 574 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 575 fEntityScanner = fXML11EntityScanner; 576 fEntityScanner.setCurrentEntity(fCurrentEntity); 577 } 578 579 } 580 581 /** 582 * This method uses the passed-in XMLInputSource to make 583 * fCurrentEntity usable for reading. 584 * 585 * @param reference flag to indicate whether the entity is an Entity Reference. 586 * @param name name of the entity (XML is it's the document entity) 587 * @param xmlInputSource the input source, with sufficient information 588 * to begin scanning characters. 589 * @param literal True if this entity is started within a 590 * literal value. 591 * @param isExternal whether this entity should be treated as an internal or external entity. 592 * @throws IOException if anything can't be read 593 * XNIException If any parser-specific goes wrong. 594 * @return the encoding of the new entity or null if a character stream was employed 595 */ 596 public String setupCurrentEntity(boolean reference, String name, XMLInputSource xmlInputSource, 597 boolean literal, boolean isExternal) 598 throws IOException, XNIException { 599 // get information 600 601 final String publicId = xmlInputSource.getPublicId(); 602 String literalSystemId = xmlInputSource.getSystemId(); 603 String baseSystemId = xmlInputSource.getBaseSystemId(); 604 String encoding = xmlInputSource.getEncoding(); 605 final boolean encodingExternallySpecified = (encoding != null); 606 Boolean isBigEndian = null; 607 608 // create reader 609 InputStream stream = null; 610 Reader reader = xmlInputSource.getCharacterStream(); 611 612 // First chance checking strict URI 613 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 614 if (baseSystemId == null) { 615 baseSystemId = expandedSystemId; 616 } 617 if (reader == null) { 618 stream = xmlInputSource.getByteStream(); 619 if (stream == null) { 620 URL location = new URL(expandedSystemId); 621 URLConnection connect = location.openConnection(); 622 if (!(connect instanceof HttpURLConnection)) { 623 stream = connect.getInputStream(); 624 } 625 else { 626 boolean followRedirects = true; 627 628 // setup URLConnection if we have an HTTPInputSource 629 if (xmlInputSource instanceof HTTPInputSource) { 630 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 631 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 632 633 // set request properties 634 Iterator<Map.Entry<String, String>> propIter = httpInputSource.getHTTPRequestProperties(); 635 while (propIter.hasNext()) { 636 Map.Entry<String, String> entry = propIter.next(); 637 urlConnection.setRequestProperty(entry.getKey(), entry.getValue()); 638 } 639 640 // set preference for redirection 641 followRedirects = httpInputSource.getFollowHTTPRedirects(); 642 if (!followRedirects) { 643 setInstanceFollowRedirects(urlConnection, followRedirects); 644 } 645 } 646 647 stream = connect.getInputStream(); 648 649 // REVISIT: If the URLConnection has external encoding 650 // information, we should be reading it here. It's located 651 // in the charset parameter of Content-Type. -- mrglavas 652 653 if (followRedirects) { 654 String redirect = connect.getURL().toString(); 655 // E43: Check if the URL was redirected, and then 656 // update literal and expanded system IDs if needed. 657 if (!redirect.equals(expandedSystemId)) { 658 literalSystemId = redirect; 659 expandedSystemId = redirect; 660 } 661 } 662 } 663 } 664 665 // wrap this stream in RewindableInputStream 666 stream = new RewindableInputStream(stream); 667 668 // perform auto-detect of encoding if necessary 669 if (encoding == null) { 670 // read first four bytes and determine encoding 671 final byte[] b4 = new byte[4]; 672 int count = 0; 673 for (; count<4; count++ ) { 674 b4[count] = (byte)stream.read(); 675 } 676 if (count == 4) { 677 Object [] encodingDesc = getEncodingName(b4, count); 678 encoding = (String)(encodingDesc[0]); 679 isBigEndian = (Boolean)(encodingDesc[1]); 680 681 stream.reset(); 682 // Special case UTF-8 files with BOM created by Microsoft 683 // tools. It's more efficient to consume the BOM than make 684 // the reader perform extra checks. -Ac 685 if (count > 2 && encoding.equals("UTF-8")) { 686 int b0 = b4[0] & 0xFF; 687 int b1 = b4[1] & 0xFF; 688 int b2 = b4[2] & 0xFF; 689 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 690 // ignore first three bytes... 691 stream.skip(3); 692 } 693 } 694 reader = createReader(stream, encoding, isBigEndian); 695 } else { 696 reader = createReader(stream, encoding, isBigEndian); 697 } 698 } 699 700 // use specified encoding 701 else { 702 encoding = encoding.toUpperCase(Locale.ENGLISH); 703 704 // If encoding is UTF-8, consume BOM if one is present. 705 if (encoding.equals("UTF-8")) { 706 final int[] b3 = new int[3]; 707 int count = 0; 708 for (; count < 3; ++count) { 709 b3[count] = stream.read(); 710 if (b3[count] == -1) 711 break; 712 } 713 if (count == 3) { 714 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 715 // First three bytes are not BOM, so reset. 716 stream.reset(); 717 } 718 } else { 719 stream.reset(); 720 } 721 } 722 // If encoding is UTF-16, we still need to read the first four bytes 723 // in order to discover the byte order. 724 else if (encoding.equals("UTF-16")) { 725 final int[] b4 = new int[4]; 726 int count = 0; 727 for (; count < 4; ++count) { 728 b4[count] = stream.read(); 729 if (b4[count] == -1) 730 break; 731 } 732 stream.reset(); 733 734 String utf16Encoding = "UTF-16"; 735 if (count >= 2) { 736 final int b0 = b4[0]; 737 final int b1 = b4[1]; 738 if (b0 == 0xFE && b1 == 0xFF) { 739 // UTF-16, big-endian 740 utf16Encoding = "UTF-16BE"; 741 isBigEndian = Boolean.TRUE; 742 } 743 else if (b0 == 0xFF && b1 == 0xFE) { 744 // UTF-16, little-endian 745 utf16Encoding = "UTF-16LE"; 746 isBigEndian = Boolean.FALSE; 747 } 748 else if (count == 4) { 749 final int b2 = b4[2]; 750 final int b3 = b4[3]; 751 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 752 // UTF-16, big-endian, no BOM 753 utf16Encoding = "UTF-16BE"; 754 isBigEndian = Boolean.TRUE; 755 } 756 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 757 // UTF-16, little-endian, no BOM 758 utf16Encoding = "UTF-16LE"; 759 isBigEndian = Boolean.FALSE; 760 } 761 } 762 } 763 reader = createReader(stream, utf16Encoding, isBigEndian); 764 } 765 // If encoding is UCS-4, we still need to read the first four bytes 766 // in order to discover the byte order. 767 else if (encoding.equals("ISO-10646-UCS-4")) { 768 final int[] b4 = new int[4]; 769 int count = 0; 770 for (; count < 4; ++count) { 771 b4[count] = stream.read(); 772 if (b4[count] == -1) 773 break; 774 } 775 stream.reset(); 776 777 // Ignore unusual octet order for now. 778 if (count == 4) { 779 // UCS-4, big endian (1234) 780 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 781 isBigEndian = Boolean.TRUE; 782 } 783 // UCS-4, little endian (1234) 784 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 785 isBigEndian = Boolean.FALSE; 786 } 787 } 788 } 789 // If encoding is UCS-2, we still need to read the first four bytes 790 // in order to discover the byte order. 791 else if (encoding.equals("ISO-10646-UCS-2")) { 792 final int[] b4 = new int[4]; 793 int count = 0; 794 for (; count < 4; ++count) { 795 b4[count] = stream.read(); 796 if (b4[count] == -1) 797 break; 798 } 799 stream.reset(); 800 801 if (count == 4) { 802 // UCS-2, big endian 803 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 804 isBigEndian = Boolean.TRUE; 805 } 806 // UCS-2, little endian 807 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 808 isBigEndian = Boolean.FALSE; 809 } 810 } 811 } 812 813 reader = createReader(stream, encoding, isBigEndian); 814 } 815 816 // read one character at a time so we don't jump too far 817 // ahead, converting characters from the byte stream in 818 // the wrong encoding 819 if (DEBUG_ENCODINGS) { 820 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 821 } 822 //reader = new OneCharReader(reader); 823 } 824 825 // We've seen a new Reader. 826 // Push it on the stack so we can close it later. 827 fReaderStack.push(reader); 828 829 // push entity on stack 830 if (fCurrentEntity != null) { 831 fEntityStack.push(fCurrentEntity); 832 } 833 834 // create entity 835 /* if encoding is specified externally, 'encoding' information present 836 * in the prolog of the XML document is not considered. Hence, prolog can 837 * be read in Chunks of data instead of byte by byte. 838 */ 839 fCurrentEntity = new Entity.ScannedEntity(reference, name, 840 new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId), 841 stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 842 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 843 fEntityScanner.setCurrentEntity(fCurrentEntity); 844 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 845 if (fLimitAnalyzer != null) { 846 fLimitAnalyzer.startEntity(name); 847 } 848 return encoding; 849 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 850 851 852 /** 853 * Checks whether an entity given by name is external. 854 * 855 * @param entityName The name of the entity to check. 856 * @return True if the entity is external, false otherwise 857 * (including when the entity is not declared). 858 */ 859 public boolean isExternalEntity(String entityName) { 860 861 Entity entity = fEntities.get(entityName); 862 if (entity == null) { 863 return false; 864 } 865 return entity.isExternal(); 866 } 867 868 /** 869 * Checks whether the declaration of an entity given by name is 870 * // in the external subset. 871 * 872 * @param entityName The name of the entity to check. 873 * @return True if the entity was declared in the external subset, false otherwise 874 * (including when the entity is not declared). 875 */ 876 public boolean isEntityDeclInExternalSubset(String entityName) { 877 878 Entity entity = fEntities.get(entityName); 879 if (entity == null) { 880 return false; 881 } 882 return entity.isEntityDeclInExternalSubset(); 883 } 884 885 886 887 // 888 // Public methods 889 // 890 891 /** 892 * Sets whether the document entity is standalone. 893 * 894 * @param standalone True if document entity is standalone. 895 */ 896 public void setStandalone(boolean standalone) { 897 fStandalone = standalone; 898 } 899 // setStandalone(boolean) 900 901 /** Returns true if the document entity is standalone. */ 902 public boolean isStandalone() { 903 return fStandalone; 904 } //isStandalone():boolean 905 906 public boolean isDeclaredEntity(String entityName) { 907 908 Entity entity = fEntities.get(entityName); 909 return entity != null; 910 } 911 912 public boolean isUnparsedEntity(String entityName) { 913 914 Entity entity = fEntities.get(entityName); 915 if (entity == null) { 916 return false; 917 } 918 return entity.isUnparsed(); 919 } 920 921 922 923 // this simply returns the fResourceIdentifier object; 924 // this should only be used with caution by callers that 925 // carefully manage the entity manager's behaviour, so that 926 // this doesn't returning meaningless or misleading data. 927 // @return a reference to the current fResourceIdentifier object 928 public XMLResourceIdentifier getCurrentResourceIdentifier() { 929 return fResourceIdentifier; 930 } 931 932 /** 933 * Sets the entity handler. When an entity starts and ends, the 934 * entity handler is notified of the change. 935 * 936 * @param entityHandler The new entity handler. 937 */ 938 939 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 940 fEntityHandler = (XMLEntityHandler) entityHandler; 941 } // setEntityHandler(XMLEntityHandler) 942 943 //this function returns StaxXMLInputSource 944 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 945 946 if(resourceIdentifier == null ) return null; 947 948 String publicId = resourceIdentifier.getPublicId(); 949 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 950 String baseSystemId = resourceIdentifier.getBaseSystemId(); 951 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 952 // if no base systemId given, assume that it's relative 953 // to the systemId of the current scanned entity 954 // Sometimes the system id is not (properly) expanded. 955 // We need to expand the system id if: 956 // a. the expanded one was null; or 957 // b. the base system id was null, but becomes non-null from the current entity. 958 boolean needExpand = (expandedSystemId == null); 959 // REVISIT: why would the baseSystemId ever be null? if we 960 // didn't have to make this check we wouldn't have to reuse the 961 // fXMLResourceIdentifier object... 962 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 963 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 964 if (baseSystemId != null) 965 needExpand = true; 966 } 967 if (needExpand) 968 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 969 970 // give the entity resolver a chance 971 StaxXMLInputSource staxInputSource = null; 972 XMLInputSource xmlInputSource = null; 973 974 XMLResourceIdentifierImpl ri = null; 975 976 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 977 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 978 } else { 979 fResourceIdentifier.clear(); 980 ri = fResourceIdentifier; 981 } 982 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 983 if(DEBUG_RESOLVER){ 984 System.out.println("BEFORE Calling resolveEntity") ; 985 } 986 987 fISCreatedByResolver = false; 988 //either of Stax or Xerces would be null 989 if(fStaxEntityResolver != null){ 990 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 991 if(staxInputSource != null) { 992 fISCreatedByResolver = true; 993 } 994 } 995 996 if(fEntityResolver != null){ 997 xmlInputSource = fEntityResolver.resolveEntity(ri); 998 if(xmlInputSource != null) { 999 fISCreatedByResolver = true; 1000 } 1001 } 1002 1003 if(xmlInputSource != null){ 1004 //wrap this XMLInputSource to StaxInputSource 1005 staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver); 1006 } 1007 1008 // do default resolution 1009 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 1010 if (staxInputSource == null) { 1011 // REVISIT: when systemId is null, I think we should return null. 1012 // is this the right solution? -SG 1013 //if (systemId != null) 1014 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 1015 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 1016 //Waiting for the clarification from EG. - nb 1017 } 1018 1019 if (DEBUG_RESOLVER) { 1020 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1021 System.err.println(" = " + xmlInputSource); 1022 } 1023 1024 return staxInputSource; 1025 1026 } 1027 1028 /** 1029 * Resolves the specified public and system identifiers. This 1030 * method first attempts to resolve the entity based on the 1031 * EntityResolver registered by the application. If no entity 1032 * resolver is registered or if the registered entity handler 1033 * is unable to resolve the entity, then default entity 1034 * resolution will occur. 1035 * 1036 * @param publicId The public identifier of the entity. 1037 * @param systemId The system identifier of the entity. 1038 * @param baseSystemId The base system identifier of the entity. 1039 * This is the system identifier of the current 1040 * entity and is used to expand the system 1041 * identifier when the system identifier is a 1042 * relative URI. 1043 * 1044 * @return Returns an input source that wraps the resolved entity. 1045 * This method will never return null. 1046 * 1047 * @throws IOException Thrown on i/o error. 1048 * @throws XNIException Thrown by entity resolver to signal an error. 1049 */ 1050 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1051 if(resourceIdentifier == null ) return null; 1052 String publicId = resourceIdentifier.getPublicId(); 1053 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1054 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1055 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1056 1057 // if no base systemId given, assume that it's relative 1058 // to the systemId of the current scanned entity 1059 // Sometimes the system id is not (properly) expanded. 1060 // We need to expand the system id if: 1061 // a. the expanded one was null; or 1062 // b. the base system id was null, but becomes non-null from the current entity. 1063 boolean needExpand = (expandedSystemId == null); 1064 // REVISIT: why would the baseSystemId ever be null? if we 1065 // didn't have to make this check we wouldn't have to reuse the 1066 // fXMLResourceIdentifier object... 1067 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1068 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1069 if (baseSystemId != null) 1070 needExpand = true; 1071 } 1072 if (needExpand) 1073 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1074 1075 // give the entity resolver a chance 1076 XMLInputSource xmlInputSource = null; 1077 1078 if (fEntityResolver != null) { 1079 resourceIdentifier.setBaseSystemId(baseSystemId); 1080 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1081 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1082 } 1083 1084 // do default resolution 1085 // REVISIT: what's the correct behavior if the user provided an entity 1086 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1087 // an input source (xmlInputSource == null)? 1088 // do we do default resolution, or do we just return null? -SG 1089 if (xmlInputSource == null) { 1090 // REVISIT: when systemId is null, I think we should return null. 1091 // is this the right solution? -SG 1092 //if (systemId != null) 1093 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1094 } 1095 1096 if (DEBUG_RESOLVER) { 1097 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1098 System.err.println(" = " + xmlInputSource); 1099 } 1100 1101 return xmlInputSource; 1102 1103 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1104 1105 /** 1106 * Starts a named entity. 1107 * 1108 * @param isGE flag to indicate whether the entity is a General Entity 1109 * @param entityName The name of the entity to start. 1110 * @param literal True if this entity is started within a literal 1111 * value. 1112 * 1113 * @throws IOException Thrown on i/o error. 1114 * @throws XNIException Thrown by entity handler to signal an error. 1115 */ 1116 public void startEntity(boolean isGE, String entityName, boolean literal) 1117 throws IOException, XNIException { 1118 1119 // was entity declared? 1120 Entity entity = fEntityStorage.getEntity(entityName); 1121 if (entity == null) { 1122 if (fEntityHandler != null) { 1123 String encoding = null; 1124 fResourceIdentifier.clear(); 1125 fEntityAugs.removeAllItems(); 1126 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1127 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1128 fEntityAugs.removeAllItems(); 1129 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1130 fEntityHandler.endEntity(entityName, fEntityAugs); 1131 } 1132 return; 1133 } 1134 1135 // should we skip external entities? 1136 boolean external = entity.isExternal(); 1137 Entity.ExternalEntity externalEntity = null; 1138 String extLitSysId = null, extBaseSysId = null, expandedSystemId = null; 1139 if (external) { 1140 externalEntity = (Entity.ExternalEntity)entity; 1141 extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1142 extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1143 expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1144 boolean unparsed = entity.isUnparsed(); 1145 boolean parameter = entityName.startsWith("%"); 1146 boolean general = !parameter; 1147 if (unparsed || (general && !fExternalGeneralEntities) || 1148 (parameter && !fExternalParameterEntities) || 1149 !fSupportDTD || !fSupportExternalEntities) { 1150 1151 if (fEntityHandler != null) { 1152 fResourceIdentifier.clear(); 1153 final String encoding = null; 1154 fResourceIdentifier.setValues( 1155 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1156 extLitSysId, extBaseSysId, expandedSystemId); 1157 fEntityAugs.removeAllItems(); 1158 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1159 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1160 fEntityAugs.removeAllItems(); 1161 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1162 fEntityHandler.endEntity(entityName, fEntityAugs); 1163 } 1164 return; 1165 } 1166 } 1167 1168 // is entity recursive? 1169 int size = fEntityStack.size(); 1170 for (int i = size; i >= 0; i--) { 1171 Entity activeEntity = i == size 1172 ? fCurrentEntity 1173 : (Entity)fEntityStack.elementAt(i); 1174 if (activeEntity.name == entityName) { 1175 String path = entityName; 1176 for (int j = i + 1; j < size; j++) { 1177 activeEntity = (Entity)fEntityStack.elementAt(j); 1178 path = path + " -> " + activeEntity.name; 1179 } 1180 path = path + " -> " + fCurrentEntity.name; 1181 path = path + " -> " + entityName; 1182 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1183 "RecursiveReference", 1184 new Object[] { entityName, path }, 1185 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1186 1187 if (fEntityHandler != null) { 1188 fResourceIdentifier.clear(); 1189 final String encoding = null; 1190 if (external) { 1191 fResourceIdentifier.setValues( 1192 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1193 extLitSysId, extBaseSysId, expandedSystemId); 1194 } 1195 fEntityAugs.removeAllItems(); 1196 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1197 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1198 fEntityAugs.removeAllItems(); 1199 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1200 fEntityHandler.endEntity(entityName, fEntityAugs); 1201 } 1202 1203 return; 1204 } 1205 } 1206 1207 // resolve external entity 1208 StaxXMLInputSource staxInputSource = null; 1209 XMLInputSource xmlInputSource = null ; 1210 1211 if (external) { 1212 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1213 /** xxx: Waiting from the EG 1214 * //simply return if there was entity resolver registered and application 1215 * //returns either XMLStreamReader or XMLEventReader. 1216 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1217 */ 1218 xmlInputSource = staxInputSource.getXMLInputSource() ; 1219 if (!fISCreatedByResolver) { 1220 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation 1221 if (fLoadExternalDTD) { 1222 String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL); 1223 if (accessError != null) { 1224 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1225 "AccessExternalEntity", 1226 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError }, 1227 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1228 } 1229 } 1230 } 1231 } 1232 // wrap internal entity 1233 else { 1234 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1235 Reader reader = new StringReader(internalEntity.text); 1236 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1237 } 1238 1239 // start the entity 1240 startEntity(isGE, entityName, xmlInputSource, literal, external); 1241 1242 } // startEntity(String,boolean) 1243 1244 /** 1245 * Starts the document entity. The document entity has the "[xml]" 1246 * pseudo-name. 1247 * 1248 * @param xmlInputSource The input source of the document entity. 1249 * 1250 * @throws IOException Thrown on i/o error. 1251 * @throws XNIException Thrown by entity handler to signal an error. 1252 */ 1253 public void startDocumentEntity(XMLInputSource xmlInputSource) 1254 throws IOException, XNIException { 1255 startEntity(false, XMLEntity, xmlInputSource, false, true); 1256 } // startDocumentEntity(XMLInputSource) 1257 1258 //xxx these methods are not required. 1259 /** 1260 * Starts the DTD entity. The DTD entity has the "[dtd]" 1261 * pseudo-name. 1262 * 1263 * @param xmlInputSource The input source of the DTD entity. 1264 * 1265 * @throws IOException Thrown on i/o error. 1266 * @throws XNIException Thrown by entity handler to signal an error. 1267 */ 1268 public void startDTDEntity(XMLInputSource xmlInputSource) 1269 throws IOException, XNIException { 1270 startEntity(false, DTDEntity, xmlInputSource, false, true); 1271 } // startDTDEntity(XMLInputSource) 1272 1273 // indicate start of external subset so that 1274 // location of entity decls can be tracked 1275 public void startExternalSubset() { 1276 fInExternalSubset = true; 1277 } 1278 1279 public void endExternalSubset() { 1280 fInExternalSubset = false; 1281 } 1282 1283 /** 1284 * Starts an entity. 1285 * <p> 1286 * This method can be used to insert an application defined XML 1287 * entity stream into the parsing stream. 1288 * 1289 * @param isGE flag to indicate whether the entity is a General Entity 1290 * @param name The name of the entity. 1291 * @param xmlInputSource The input source of the entity. 1292 * @param literal True if this entity is started within a 1293 * literal value. 1294 * @param isExternal whether this entity should be treated as an internal or external entity. 1295 * 1296 * @throws IOException Thrown on i/o error. 1297 * @throws XNIException Thrown by entity handler to signal an error. 1298 */ 1299 public void startEntity(boolean isGE, String name, 1300 XMLInputSource xmlInputSource, 1301 boolean literal, boolean isExternal) 1302 throws IOException, XNIException { 1303 1304 String encoding = setupCurrentEntity(isGE, name, xmlInputSource, literal, isExternal); 1305 1306 //when entity expansion limit is set by the Application, we need to 1307 //check for the entity expansion limit set by the parser, if number of entity 1308 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1309 // Note that this represents the nesting level of open entities. 1310 fEntityExpansionCount++; 1311 if(fLimitAnalyzer != null) { 1312 fLimitAnalyzer.addValue(entityExpansionIndex, name, 1); 1313 } 1314 if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){ 1315 fSecurityManager.debugPrint(fLimitAnalyzer); 1316 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimit", 1317 new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)}, 1318 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1319 // is there anything better to do than reset the counter? 1320 // at least one can envision debugging applications where this might 1321 // be useful... 1322 fEntityExpansionCount = 0; 1323 } 1324 1325 // call handler 1326 if (fEntityHandler != null) { 1327 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1328 } 1329 1330 } // startEntity(String,XMLInputSource) 1331 1332 /** 1333 * Return the current entity being scanned. Current entity is SET using startEntity function. 1334 * @return Entity.ScannedEntity 1335 */ 1336 1337 public Entity.ScannedEntity getCurrentEntity(){ 1338 return fCurrentEntity ; 1339 } 1340 1341 /** 1342 * Return the top level entity handled by this manager, or null 1343 * if no entity was added. 1344 */ 1345 public Entity.ScannedEntity getTopLevelEntity() { 1346 return (Entity.ScannedEntity) 1347 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1348 } 1349 1350 // A stack containing all the open readers 1351 protected Stack<Reader> fReaderStack = new Stack<>(); 1352 1353 /** 1354 * Close all opened InputStreams and Readers opened by this parser. 1355 */ 1356 public void closeReaders() { 1357 // close all readers 1358 while (!fReaderStack.isEmpty()) { 1359 try { 1360 (fReaderStack.pop()).close(); 1361 } catch (IOException e) { 1362 // ignore 1363 } 1364 } 1365 } 1366 1367 public void endEntity() throws IOException, XNIException { 1368 1369 // call handler 1370 if (DEBUG_BUFFER) { 1371 System.out.print("(endEntity: "); 1372 print(); 1373 System.out.println(); 1374 } 1375 //pop the entity from the stack 1376 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1377 1378 /** need to close the reader first since the program can end 1379 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1380 * leaving the reader open 1381 */ 1382 //close the reader 1383 if(fCurrentEntity != null){ 1384 //close the reader 1385 try{ 1386 if (fLimitAnalyzer != null) { 1387 fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name); 1388 if (fCurrentEntity.name.equals("[xml]")) { 1389 fSecurityManager.debugPrint(fLimitAnalyzer); 1390 } 1391 } 1392 fCurrentEntity.close(); 1393 }catch(IOException ex){ 1394 throw new XNIException(ex); 1395 } 1396 } 1397 1398 // REVISIT: We should never encounter underflow if the calls 1399 // to startEntity and endEntity are balanced, but guard 1400 // against the EmptyStackException for now. -- mrglavas 1401 if (!fReaderStack.isEmpty()) { 1402 fReaderStack.pop(); 1403 } 1404 1405 if (fEntityHandler != null) { 1406 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1407 if(entity == null){ 1408 fEntityAugs.removeAllItems(); 1409 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1410 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1411 fEntityAugs.removeAllItems(); 1412 }else{ 1413 fEntityHandler.endEntity(fCurrentEntity.name, null); 1414 } 1415 } 1416 //check if it is a document entity 1417 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1418 1419 //set popped entity as current entity 1420 fCurrentEntity = entity; 1421 fEntityScanner.setCurrentEntity(fCurrentEntity); 1422 1423 //check if there are any entity left in the stack -- if there are 1424 //no entries EOF has been reached. 1425 // throw exception when it is the last entity but it is not a document entity 1426 1427 if(fCurrentEntity == null & !documentEntity){ 1428 throw new EOFException() ; 1429 } 1430 1431 if (DEBUG_BUFFER) { 1432 System.out.print(")endEntity: "); 1433 print(); 1434 System.out.println(); 1435 } 1436 1437 } // endEntity() 1438 1439 1440 // 1441 // XMLComponent methods 1442 // 1443 public void reset(PropertyManager propertyManager){ 1444 // xerces properties 1445 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1446 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1447 try { 1448 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1449 } catch (XMLConfigurationException e) { 1450 fStaxEntityResolver = null; 1451 } 1452 1453 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 1454 fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue(); 1455 fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue(); 1456 1457 // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd 1458 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 1459 1460 // JAXP 1.5 feature 1461 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 1462 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1463 1464 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 1465 1466 fLimitAnalyzer = new XMLLimitAnalyzer(); 1467 //reset fEntityStorage 1468 fEntityStorage.reset(propertyManager); 1469 //reset XMLEntityReaderImpl 1470 fEntityScanner.reset(propertyManager); 1471 1472 // initialize state 1473 //fStandalone = false; 1474 fEntities.clear(); 1475 fEntityStack.removeAllElements(); 1476 fCurrentEntity = null; 1477 fValidation = false; 1478 fExternalGeneralEntities = true; 1479 fExternalParameterEntities = true; 1480 fAllowJavaEncodings = true ; 1481 } 1482 1483 /** 1484 * Resets the component. The component can query the component manager 1485 * about any features and properties that affect the operation of the 1486 * component. 1487 * 1488 * @param componentManager The component manager. 1489 * 1490 * @throws SAXException Thrown by component on initialization error. 1491 * For example, if a feature or property is 1492 * required for the operation of the component, the 1493 * component manager may throw a 1494 * SAXNotRecognizedException or a 1495 * SAXNotSupportedException. 1496 */ 1497 public void reset(XMLComponentManager componentManager) 1498 throws XMLConfigurationException { 1499 1500 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1501 1502 if (!parser_settings) { 1503 // parser settings have not been changed 1504 reset(); 1505 if(fEntityScanner != null){ 1506 fEntityScanner.reset(componentManager); 1507 } 1508 if(fEntityStorage != null){ 1509 fEntityStorage.reset(componentManager); 1510 } 1511 return; 1512 } 1513 1514 // sax features 1515 fValidation = componentManager.getFeature(VALIDATION, false); 1516 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1517 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1518 1519 // xerces features 1520 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1521 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1522 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1523 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 1524 1525 // xerces properties 1526 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1527 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1528 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1529 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1530 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1531 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1532 entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT); 1533 1534 //StAX Property 1535 fSupportDTD = true; 1536 fReplaceEntityReferences = true; 1537 fSupportExternalEntities = true; 1538 1539 // JAXP 1.5 feature 1540 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 1541 if (spm == null) { 1542 spm = new XMLSecurityPropertyManager(); 1543 } 1544 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1545 1546 //reset general state 1547 reset(); 1548 1549 fEntityScanner.reset(componentManager); 1550 fEntityStorage.reset(componentManager); 1551 1552 } // reset(XMLComponentManager) 1553 1554 // reset general state. Should not be called other than by 1555 // a class acting as a component manager but not 1556 // implementing that interface for whatever reason. 1557 public void reset() { 1558 fLimitAnalyzer = new XMLLimitAnalyzer(); 1559 // initialize state 1560 fStandalone = false; 1561 fEntities.clear(); 1562 fEntityStack.removeAllElements(); 1563 fEntityExpansionCount = 0; 1564 1565 fCurrentEntity = null; 1566 // reset scanner 1567 if(fXML10EntityScanner != null){ 1568 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1569 } 1570 if(fXML11EntityScanner != null) { 1571 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1572 } 1573 1574 // DEBUG 1575 if (DEBUG_ENTITIES) { 1576 addInternalEntity("text", "Hello, World."); 1577 addInternalEntity("empty-element", "<foo/>"); 1578 addInternalEntity("balanced-element", "<foo></foo>"); 1579 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1580 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1581 addInternalEntity("unbalanced-entity", "<foo>"); 1582 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1583 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1584 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1585 try { 1586 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1587 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1588 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1589 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1590 } 1591 catch (IOException ex) { 1592 // should never happen 1593 } 1594 } 1595 1596 fEntityHandler = null; 1597 1598 // reset scanner 1599 //if(fEntityScanner!=null) 1600 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1601 1602 } 1603 /** 1604 * Returns a list of feature identifiers that are recognized by 1605 * this component. This method may return null if no features 1606 * are recognized by this component. 1607 */ 1608 public String[] getRecognizedFeatures() { 1609 return (String[])(RECOGNIZED_FEATURES.clone()); 1610 } // getRecognizedFeatures():String[] 1611 1612 /** 1613 * Sets the state of a feature. This method is called by the component 1614 * manager any time after reset when a feature changes state. 1615 * <p> 1616 * <strong>Note:</strong> Components should silently ignore features 1617 * that do not affect the operation of the component. 1618 * 1619 * @param featureId The feature identifier. 1620 * @param state The state of the feature. 1621 * 1622 * @throws SAXNotRecognizedException The component should not throw 1623 * this exception. 1624 * @throws SAXNotSupportedException The component should not throw 1625 * this exception. 1626 */ 1627 public void setFeature(String featureId, boolean state) 1628 throws XMLConfigurationException { 1629 1630 // xerces features 1631 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1632 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1633 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1634 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1635 fAllowJavaEncodings = state; 1636 } 1637 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 1638 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 1639 fLoadExternalDTD = state; 1640 return; 1641 } 1642 } 1643 1644 } // setFeature(String,boolean) 1645 1646 /** 1647 * Sets the value of a property. This method is called by the component 1648 * manager any time after reset when a property changes value. 1649 * <p> 1650 * <strong>Note:</strong> Components should silently ignore properties 1651 * that do not affect the operation of the component. 1652 * 1653 * @param propertyId The property identifier. 1654 * @param value The value of the property. 1655 * 1656 * @throws SAXNotRecognizedException The component should not throw 1657 * this exception. 1658 * @throws SAXNotSupportedException The component should not throw 1659 * this exception. 1660 */ 1661 public void setProperty(String propertyId, Object value){ 1662 // Xerces properties 1663 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1664 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1665 1666 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1667 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1668 fSymbolTable = (SymbolTable)value; 1669 return; 1670 } 1671 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1672 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1673 fErrorReporter = (XMLErrorReporter)value; 1674 return; 1675 } 1676 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1677 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1678 fEntityResolver = (XMLEntityResolver)value; 1679 return; 1680 } 1681 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1682 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1683 Integer bufferSize = (Integer)value; 1684 if (bufferSize != null && 1685 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1686 fBufferSize = bufferSize.intValue(); 1687 fEntityScanner.setBufferSize(fBufferSize); 1688 fBufferPool.setExternalBufferSize(fBufferSize); 1689 } 1690 } 1691 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1692 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1693 fSecurityManager = (XMLSecurityManager)value; 1694 } 1695 } 1696 1697 //JAXP 1.5 properties 1698 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 1699 { 1700 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 1701 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1702 } 1703 } 1704 1705 public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) { 1706 this.fLimitAnalyzer = fLimitAnalyzer; 1707 } 1708 1709 /** 1710 * Returns a list of property identifiers that are recognized by 1711 * this component. This method may return null if no properties 1712 * are recognized by this component. 1713 */ 1714 public String[] getRecognizedProperties() { 1715 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1716 } // getRecognizedProperties():String[] 1717 /** 1718 * Returns the default state for a feature, or null if this 1719 * component does not want to report a default value for this 1720 * feature. 1721 * 1722 * @param featureId The feature identifier. 1723 * 1724 * @since Xerces 2.2.0 1725 */ 1726 public Boolean getFeatureDefault(String featureId) { 1727 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1728 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1729 return FEATURE_DEFAULTS[i]; 1730 } 1731 } 1732 return null; 1733 } // getFeatureDefault(String):Boolean 1734 1735 /** 1736 * Returns the default state for a property, or null if this 1737 * component does not want to report a default value for this 1738 * property. 1739 * 1740 * @param propertyId The property identifier. 1741 * 1742 * @since Xerces 2.2.0 1743 */ 1744 public Object getPropertyDefault(String propertyId) { 1745 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1746 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1747 return PROPERTY_DEFAULTS[i]; 1748 } 1749 } 1750 return null; 1751 } // getPropertyDefault(String):Object 1752 1753 // 1754 // Public static methods 1755 // 1756 1757 /** 1758 * Expands a system id and returns the system id as a URI, if 1759 * it can be expanded. A return value of null means that the 1760 * identifier is already expanded. An exception thrown 1761 * indicates a failure to expand the id. 1762 * 1763 * @param systemId The systemId to be expanded. 1764 * 1765 * @return Returns the URI string representing the expanded system 1766 * identifier. A null value indicates that the given 1767 * system identifier is already expanded. 1768 * 1769 */ 1770 public static String expandSystemId(String systemId) { 1771 return expandSystemId(systemId, null); 1772 } // expandSystemId(String):String 1773 1774 // 1775 // Public static methods 1776 // 1777 1778 // current value of the "user.dir" property 1779 private static String gUserDir; 1780 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1781 private static URI gUserDirURI; 1782 // which ASCII characters need to be escaped 1783 private static boolean gNeedEscaping[] = new boolean[128]; 1784 // the first hex character if a character needs to be escaped 1785 private static char gAfterEscaping1[] = new char[128]; 1786 // the second hex character if a character needs to be escaped 1787 private static char gAfterEscaping2[] = new char[128]; 1788 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1789 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1790 // initialize the above 3 arrays 1791 static { 1792 for (int i = 0; i <= 0x1f; i++) { 1793 gNeedEscaping[i] = true; 1794 gAfterEscaping1[i] = gHexChs[i >> 4]; 1795 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1796 } 1797 gNeedEscaping[0x7f] = true; 1798 gAfterEscaping1[0x7f] = '7'; 1799 gAfterEscaping2[0x7f] = 'F'; 1800 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1801 '|', '\\', '^', '~', '[', ']', '`'}; 1802 int len = escChs.length; 1803 char ch; 1804 for (int i = 0; i < len; i++) { 1805 ch = escChs[i]; 1806 gNeedEscaping[ch] = true; 1807 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1808 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1809 } 1810 } 1811 1812 // To escape the "user.dir" system property, by using %HH to represent 1813 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1814 // and '"'. It's a static method, so needs to be synchronized. 1815 // this method looks heavy, but since the system property isn't expected 1816 // to change often, so in most cases, we only need to return the URI 1817 // that was escaped before. 1818 // According to the URI spec, non-ASCII characters (whose value >= 128) 1819 // need to be escaped too. 1820 // REVISIT: don't know how to escape non-ASCII characters, especially 1821 // which encoding to use. Leave them for now. 1822 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1823 // get the user.dir property 1824 String userDir = ""; 1825 try { 1826 userDir = SecuritySupport.getSystemProperty("user.dir"); 1827 } 1828 catch (SecurityException se) { 1829 } 1830 1831 // return empty string if property value is empty string. 1832 if (userDir.length() == 0) 1833 return new URI("file", "", "", null, null); 1834 // compute the new escaped value if the new property value doesn't 1835 // match the previous one 1836 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1837 return gUserDirURI; 1838 } 1839 1840 // record the new value as the global property value 1841 gUserDir = userDir; 1842 1843 char separator = java.io.File.separatorChar; 1844 userDir = userDir.replace(separator, '/'); 1845 1846 int len = userDir.length(), ch; 1847 StringBuffer buffer = new StringBuffer(len*3); 1848 // change C:/blah to /C:/blah 1849 if (len >= 2 && userDir.charAt(1) == ':') { 1850 ch = Character.toUpperCase(userDir.charAt(0)); 1851 if (ch >= 'A' && ch <= 'Z') { 1852 buffer.append('/'); 1853 } 1854 } 1855 1856 // for each character in the path 1857 int i = 0; 1858 for (; i < len; i++) { 1859 ch = userDir.charAt(i); 1860 // if it's not an ASCII character, break here, and use UTF-8 encoding 1861 if (ch >= 128) 1862 break; 1863 if (gNeedEscaping[ch]) { 1864 buffer.append('%'); 1865 buffer.append(gAfterEscaping1[ch]); 1866 buffer.append(gAfterEscaping2[ch]); 1867 // record the fact that it's escaped 1868 } 1869 else { 1870 buffer.append((char)ch); 1871 } 1872 } 1873 1874 // we saw some non-ascii character 1875 if (i < len) { 1876 // get UTF-8 bytes for the remaining sub-string 1877 byte[] bytes = null; 1878 byte b; 1879 try { 1880 bytes = userDir.substring(i).getBytes("UTF-8"); 1881 } catch (java.io.UnsupportedEncodingException e) { 1882 // should never happen 1883 return new URI("file", "", userDir, null, null); 1884 } 1885 len = bytes.length; 1886 1887 // for each byte 1888 for (i = 0; i < len; i++) { 1889 b = bytes[i]; 1890 // for non-ascii character: make it positive, then escape 1891 if (b < 0) { 1892 ch = b + 256; 1893 buffer.append('%'); 1894 buffer.append(gHexChs[ch >> 4]); 1895 buffer.append(gHexChs[ch & 0xf]); 1896 } 1897 else if (gNeedEscaping[b]) { 1898 buffer.append('%'); 1899 buffer.append(gAfterEscaping1[b]); 1900 buffer.append(gAfterEscaping2[b]); 1901 } 1902 else { 1903 buffer.append((char)b); 1904 } 1905 } 1906 } 1907 1908 // change blah/blah to blah/blah/ 1909 if (!userDir.endsWith("/")) 1910 buffer.append('/'); 1911 1912 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1913 1914 return gUserDirURI; 1915 } 1916 1917 /** 1918 * Absolutizes a URI using the current value 1919 * of the "user.dir" property as the base URI. If 1920 * the URI is already absolute, this is a no-op. 1921 * 1922 * @param uri the URI to absolutize 1923 */ 1924 public static void absolutizeAgainstUserDir(URI uri) 1925 throws URI.MalformedURIException { 1926 uri.absolutize(getUserDir()); 1927 } 1928 1929 /** 1930 * Expands a system id and returns the system id as a URI, if 1931 * it can be expanded. A return value of null means that the 1932 * identifier is already expanded. An exception thrown 1933 * indicates a failure to expand the id. 1934 * 1935 * @param systemId The systemId to be expanded. 1936 * 1937 * @return Returns the URI string representing the expanded system 1938 * identifier. A null value indicates that the given 1939 * system identifier is already expanded. 1940 * 1941 */ 1942 public static String expandSystemId(String systemId, String baseSystemId) { 1943 1944 // check for bad parameters id 1945 if (systemId == null || systemId.length() == 0) { 1946 return systemId; 1947 } 1948 // if id already expanded, return 1949 try { 1950 URI uri = new URI(systemId); 1951 if (uri != null) { 1952 return systemId; 1953 } 1954 } catch (URI.MalformedURIException e) { 1955 // continue on... 1956 } 1957 // normalize id 1958 String id = fixURI(systemId); 1959 1960 // normalize base 1961 URI base = null; 1962 URI uri = null; 1963 try { 1964 if (baseSystemId == null || baseSystemId.length() == 0 || 1965 baseSystemId.equals(systemId)) { 1966 String dir = getUserDir().toString(); 1967 base = new URI("file", "", dir, null, null); 1968 } else { 1969 try { 1970 base = new URI(fixURI(baseSystemId)); 1971 } catch (URI.MalformedURIException e) { 1972 if (baseSystemId.indexOf(':') != -1) { 1973 // for xml schemas we might have baseURI with 1974 // a specified drive 1975 base = new URI("file", "", fixURI(baseSystemId), null, null); 1976 } else { 1977 String dir = getUserDir().toString(); 1978 dir = dir + fixURI(baseSystemId); 1979 base = new URI("file", "", dir, null, null); 1980 } 1981 } 1982 } 1983 // expand id 1984 uri = new URI(base, id); 1985 } catch (Exception e) { 1986 // let it go through 1987 1988 } 1989 1990 if (uri == null) { 1991 return systemId; 1992 } 1993 return uri.toString(); 1994 1995 } // expandSystemId(String,String):String 1996 1997 /** 1998 * Expands a system id and returns the system id as a URI, if 1999 * it can be expanded. A return value of null means that the 2000 * identifier is already expanded. An exception thrown 2001 * indicates a failure to expand the id. 2002 * 2003 * @param systemId The systemId to be expanded. 2004 * 2005 * @return Returns the URI string representing the expanded system 2006 * identifier. A null value indicates that the given 2007 * system identifier is already expanded. 2008 * 2009 */ 2010 public static String expandSystemId(String systemId, String baseSystemId, 2011 boolean strict) 2012 throws URI.MalformedURIException { 2013 2014 // check if there is a system id before 2015 // trying to expand it. 2016 if (systemId == null) { 2017 return null; 2018 } 2019 2020 // system id has to be a valid URI 2021 if (strict) { 2022 try { 2023 // if it's already an absolute one, return it 2024 new URI(systemId); 2025 return systemId; 2026 } 2027 catch (URI.MalformedURIException ex) { 2028 } 2029 URI base = null; 2030 // if there isn't a base uri, use the working directory 2031 if (baseSystemId == null || baseSystemId.length() == 0) { 2032 base = new URI("file", "", getUserDir().toString(), null, null); 2033 } 2034 // otherwise, use the base uri 2035 else { 2036 try { 2037 base = new URI(baseSystemId); 2038 } 2039 catch (URI.MalformedURIException e) { 2040 // assume "base" is also a relative uri 2041 String dir = getUserDir().toString(); 2042 dir = dir + baseSystemId; 2043 base = new URI("file", "", dir, null, null); 2044 } 2045 } 2046 // absolutize the system id using the base 2047 URI uri = new URI(base, systemId); 2048 // return the string rep of the new uri (an absolute one) 2049 return uri.toString(); 2050 2051 // if any exception is thrown, it'll get thrown to the caller. 2052 } 2053 2054 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 2055 try { 2056 return expandSystemIdStrictOff(systemId, baseSystemId); 2057 } 2058 catch (URI.MalformedURIException e) { 2059 /** Xerces URI rejects unicode, try java.net.URI 2060 * this is not ideal solution, but it covers known cases which either 2061 * Xerces URI or java.net.URI can handle alone 2062 * will file bug against java.net.URI 2063 */ 2064 try { 2065 return expandSystemIdStrictOff1(systemId, baseSystemId); 2066 } catch (URISyntaxException ex) { 2067 // continue on... 2068 } 2069 } 2070 // check for bad parameters id 2071 if (systemId.length() == 0) { 2072 return systemId; 2073 } 2074 2075 // normalize id 2076 String id = fixURI(systemId); 2077 2078 // normalize base 2079 URI base = null; 2080 URI uri = null; 2081 try { 2082 if (baseSystemId == null || baseSystemId.length() == 0 || 2083 baseSystemId.equals(systemId)) { 2084 base = getUserDir(); 2085 } 2086 else { 2087 try { 2088 base = new URI(fixURI(baseSystemId).trim()); 2089 } 2090 catch (URI.MalformedURIException e) { 2091 if (baseSystemId.indexOf(':') != -1) { 2092 // for xml schemas we might have baseURI with 2093 // a specified drive 2094 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2095 } 2096 else { 2097 base = new URI(getUserDir(), fixURI(baseSystemId)); 2098 } 2099 } 2100 } 2101 // expand id 2102 uri = new URI(base, id.trim()); 2103 } 2104 catch (Exception e) { 2105 // let it go through 2106 2107 } 2108 2109 if (uri == null) { 2110 return systemId; 2111 } 2112 return uri.toString(); 2113 2114 } // expandSystemId(String,String,boolean):String 2115 2116 /** 2117 * Helper method for expandSystemId(String,String,boolean):String 2118 */ 2119 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2120 throws URI.MalformedURIException { 2121 2122 URI systemURI = new URI(systemId, true); 2123 // If it's already an absolute one, return it 2124 if (systemURI.isAbsoluteURI()) { 2125 return systemId; 2126 } 2127 2128 // If there isn't a base URI, use the working directory 2129 URI baseURI = null; 2130 if (baseSystemId == null || baseSystemId.length() == 0) { 2131 baseURI = getUserDir(); 2132 } 2133 else { 2134 baseURI = new URI(baseSystemId, true); 2135 if (!baseURI.isAbsoluteURI()) { 2136 // assume "base" is also a relative uri 2137 baseURI.absolutize(getUserDir()); 2138 } 2139 } 2140 2141 // absolutize the system identifier using the base URI 2142 systemURI.absolutize(baseURI); 2143 2144 // return the string rep of the new uri (an absolute one) 2145 return systemURI.toString(); 2146 2147 // if any exception is thrown, it'll get thrown to the caller. 2148 2149 } // expandSystemIdStrictOn(String,String):String 2150 2151 /** 2152 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>. 2153 * This may fail on earlier JDKs which do not support setting this preference. 2154 */ 2155 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) { 2156 try { 2157 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE}); 2158 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE}); 2159 } 2160 // setInstanceFollowRedirects doesn't exist. 2161 catch (Exception exc) {} 2162 } 2163 2164 2165 /** 2166 * Helper method for expandSystemId(String,String,boolean):String 2167 */ 2168 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2169 throws URI.MalformedURIException { 2170 2171 URI systemURI = new URI(systemId, true); 2172 // If it's already an absolute one, return it 2173 if (systemURI.isAbsoluteURI()) { 2174 if (systemURI.getScheme().length() > 1) { 2175 return systemId; 2176 } 2177 /** 2178 * If the scheme's length is only one character, 2179 * it's likely that this was intended as a file 2180 * path. Fixing this up in expandSystemId to 2181 * maintain backwards compatibility. 2182 */ 2183 throw new URI.MalformedURIException(); 2184 } 2185 2186 // If there isn't a base URI, use the working directory 2187 URI baseURI = null; 2188 if (baseSystemId == null || baseSystemId.length() == 0) { 2189 baseURI = getUserDir(); 2190 } 2191 else { 2192 baseURI = new URI(baseSystemId, true); 2193 if (!baseURI.isAbsoluteURI()) { 2194 // assume "base" is also a relative uri 2195 baseURI.absolutize(getUserDir()); 2196 } 2197 } 2198 2199 // absolutize the system identifier using the base URI 2200 systemURI.absolutize(baseURI); 2201 2202 // return the string rep of the new uri (an absolute one) 2203 return systemURI.toString(); 2204 2205 // if any exception is thrown, it'll get thrown to the caller. 2206 2207 } // expandSystemIdStrictOff(String,String):String 2208 2209 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2210 throws URISyntaxException, URI.MalformedURIException { 2211 2212 java.net.URI systemURI = new java.net.URI(systemId); 2213 // If it's already an absolute one, return it 2214 if (systemURI.isAbsolute()) { 2215 if (systemURI.getScheme().length() > 1) { 2216 return systemId; 2217 } 2218 /** 2219 * If the scheme's length is only one character, 2220 * it's likely that this was intended as a file 2221 * path. Fixing this up in expandSystemId to 2222 * maintain backwards compatibility. 2223 */ 2224 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2225 } 2226 2227 // If there isn't a base URI, use the working directory 2228 URI baseURI = null; 2229 if (baseSystemId == null || baseSystemId.length() == 0) { 2230 baseURI = getUserDir(); 2231 } 2232 else { 2233 baseURI = new URI(baseSystemId, true); 2234 if (!baseURI.isAbsoluteURI()) { 2235 // assume "base" is also a relative uri 2236 baseURI.absolutize(getUserDir()); 2237 } 2238 } 2239 2240 // absolutize the system identifier using the base URI 2241 // systemURI.absolutize(baseURI); 2242 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2243 2244 // return the string rep of the new uri (an absolute one) 2245 return systemURI.toString(); 2246 2247 // if any exception is thrown, it'll get thrown to the caller. 2248 2249 } // expandSystemIdStrictOff(String,String):String 2250 2251 // 2252 // Protected methods 2253 // 2254 2255 2256 /** 2257 * Returns the IANA encoding name that is auto-detected from 2258 * the bytes specified, with the endian-ness of that encoding where appropriate. 2259 * 2260 * @param b4 The first four bytes of the input. 2261 * @param count The number of bytes actually read. 2262 * @return a 2-element array: the first element, an IANA-encoding string, 2263 * the second element a Boolean which is true iff the document is big endian, false 2264 * if it's little-endian, and null if the distinction isn't relevant. 2265 */ 2266 protected Object[] getEncodingName(byte[] b4, int count) { 2267 2268 if (count < 2) { 2269 return defaultEncoding; 2270 } 2271 2272 // UTF-16, with BOM 2273 int b0 = b4[0] & 0xFF; 2274 int b1 = b4[1] & 0xFF; 2275 if (b0 == 0xFE && b1 == 0xFF) { 2276 // UTF-16, big-endian 2277 return new Object [] {"UTF-16BE", new Boolean(true)}; 2278 } 2279 if (b0 == 0xFF && b1 == 0xFE) { 2280 // UTF-16, little-endian 2281 return new Object [] {"UTF-16LE", new Boolean(false)}; 2282 } 2283 2284 // default to UTF-8 if we don't have enough bytes to make a 2285 // good determination of the encoding 2286 if (count < 3) { 2287 return defaultEncoding; 2288 } 2289 2290 // UTF-8 with a BOM 2291 int b2 = b4[2] & 0xFF; 2292 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2293 return defaultEncoding; 2294 } 2295 2296 // default to UTF-8 if we don't have enough bytes to make a 2297 // good determination of the encoding 2298 if (count < 4) { 2299 return defaultEncoding; 2300 } 2301 2302 // other encodings 2303 int b3 = b4[3] & 0xFF; 2304 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2305 // UCS-4, big endian (1234) 2306 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2307 } 2308 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2309 // UCS-4, little endian (4321) 2310 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2311 } 2312 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2313 // UCS-4, unusual octet order (2143) 2314 // REVISIT: What should this be? 2315 return new Object [] {"ISO-10646-UCS-4", null}; 2316 } 2317 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2318 // UCS-4, unusual octect order (3412) 2319 // REVISIT: What should this be? 2320 return new Object [] {"ISO-10646-UCS-4", null}; 2321 } 2322 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2323 // UTF-16, big-endian, no BOM 2324 // (or could turn out to be UCS-2... 2325 // REVISIT: What should this be? 2326 return new Object [] {"UTF-16BE", new Boolean(true)}; 2327 } 2328 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2329 // UTF-16, little-endian, no BOM 2330 // (or could turn out to be UCS-2... 2331 return new Object [] {"UTF-16LE", new Boolean(false)}; 2332 } 2333 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2334 // EBCDIC 2335 // a la xerces1, return CP037 instead of EBCDIC here 2336 return new Object [] {"CP037", null}; 2337 } 2338 2339 return defaultEncoding; 2340 2341 } // getEncodingName(byte[],int):Object[] 2342 2343 /** 2344 * Creates a reader capable of reading the given input stream in 2345 * the specified encoding. 2346 * 2347 * @param inputStream The input stream. 2348 * @param encoding The encoding name that the input stream is 2349 * encoded using. If the user has specified that 2350 * Java encoding names are allowed, then the 2351 * encoding name may be a Java encoding name; 2352 * otherwise, it is an ianaEncoding name. 2353 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2354 * specify a byte order, this tells whether the order is bigEndian. null menas 2355 * unknown or not relevant. 2356 * 2357 * @return Returns a reader. 2358 */ 2359 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2360 throws IOException { 2361 2362 // normalize encoding name 2363 if (encoding == null) { 2364 encoding = "UTF-8"; 2365 } 2366 2367 // try to use an optimized reader 2368 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2369 if (ENCODING.equals("UTF-8")) { 2370 if (DEBUG_ENCODINGS) { 2371 System.out.println("$$$ creating UTF8Reader"); 2372 } 2373 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2374 } 2375 if (ENCODING.equals("US-ASCII")) { 2376 if (DEBUG_ENCODINGS) { 2377 System.out.println("$$$ creating ASCIIReader"); 2378 } 2379 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2380 } 2381 if(ENCODING.equals("ISO-10646-UCS-4")) { 2382 if(isBigEndian != null) { 2383 boolean isBE = isBigEndian.booleanValue(); 2384 if(isBE) { 2385 return new UCSReader(inputStream, UCSReader.UCS4BE); 2386 } else { 2387 return new UCSReader(inputStream, UCSReader.UCS4LE); 2388 } 2389 } else { 2390 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2391 "EncodingByteOrderUnsupported", 2392 new Object[] { encoding }, 2393 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2394 } 2395 } 2396 if(ENCODING.equals("ISO-10646-UCS-2")) { 2397 if(isBigEndian != null) { // sould never happen with this encoding... 2398 boolean isBE = isBigEndian.booleanValue(); 2399 if(isBE) { 2400 return new UCSReader(inputStream, UCSReader.UCS2BE); 2401 } else { 2402 return new UCSReader(inputStream, UCSReader.UCS2LE); 2403 } 2404 } else { 2405 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2406 "EncodingByteOrderUnsupported", 2407 new Object[] { encoding }, 2408 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2409 } 2410 } 2411 2412 // check for valid name 2413 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2414 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2415 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2416 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2417 "EncodingDeclInvalid", 2418 new Object[] { encoding }, 2419 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2420 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2421 // because every byte is a valid ISO Latin 1 character. 2422 // It may not translate correctly but if we failed on 2423 // the encoding anyway, then we're expecting the content 2424 // of the document to be bad. This will just prevent an 2425 // invalid UTF-8 sequence to be detected. This is only 2426 // important when continue-after-fatal-error is turned 2427 // on. -Ac 2428 encoding = "ISO-8859-1"; 2429 } 2430 2431 // try to use a Java reader 2432 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2433 if (javaEncoding == null) { 2434 if(fAllowJavaEncodings) { 2435 javaEncoding = encoding; 2436 } else { 2437 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2438 "EncodingDeclInvalid", 2439 new Object[] { encoding }, 2440 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2441 // see comment above. 2442 javaEncoding = "ISO8859_1"; 2443 } 2444 } 2445 if (DEBUG_ENCODINGS) { 2446 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2447 if (javaEncoding == encoding) { 2448 System.out.print(" (IANA encoding)"); 2449 } 2450 System.out.println(); 2451 } 2452 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2453 2454 } // createReader(InputStream,String, Boolean): Reader 2455 2456 2457 /** 2458 * Return the public identifier for the current document event. 2459 * <p> 2460 * The return value is the public identifier of the document 2461 * entity or of the external parsed entity in which the markup 2462 * triggering the event appears. 2463 * 2464 * @return A string containing the public identifier, or 2465 * null if none is available. 2466 */ 2467 public String getPublicId() { 2468 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2469 } // getPublicId():String 2470 2471 /** 2472 * Return the expanded system identifier for the current document event. 2473 * <p> 2474 * The return value is the expanded system identifier of the document 2475 * entity or of the external parsed entity in which the markup 2476 * triggering the event appears. 2477 * <p> 2478 * If the system identifier is a URL, the parser must resolve it 2479 * fully before passing it to the application. 2480 * 2481 * @return A string containing the expanded system identifier, or null 2482 * if none is available. 2483 */ 2484 public String getExpandedSystemId() { 2485 if (fCurrentEntity != null) { 2486 if (fCurrentEntity.entityLocation != null && 2487 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2488 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2489 } else { 2490 // search for the first external entity on the stack 2491 int size = fEntityStack.size(); 2492 for (int i = size - 1; i >= 0 ; i--) { 2493 Entity.ScannedEntity externalEntity = 2494 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2495 2496 if (externalEntity.entityLocation != null && 2497 externalEntity.entityLocation.getExpandedSystemId() != null) { 2498 return externalEntity.entityLocation.getExpandedSystemId(); 2499 } 2500 } 2501 } 2502 } 2503 return null; 2504 } // getExpandedSystemId():String 2505 2506 /** 2507 * Return the literal system identifier for the current document event. 2508 * <p> 2509 * The return value is the literal system identifier of the document 2510 * entity or of the external parsed entity in which the markup 2511 * triggering the event appears. 2512 * <p> 2513 * @return A string containing the literal system identifier, or null 2514 * if none is available. 2515 */ 2516 public String getLiteralSystemId() { 2517 if (fCurrentEntity != null) { 2518 if (fCurrentEntity.entityLocation != null && 2519 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2520 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2521 } else { 2522 // search for the first external entity on the stack 2523 int size = fEntityStack.size(); 2524 for (int i = size - 1; i >= 0 ; i--) { 2525 Entity.ScannedEntity externalEntity = 2526 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2527 2528 if (externalEntity.entityLocation != null && 2529 externalEntity.entityLocation.getLiteralSystemId() != null) { 2530 return externalEntity.entityLocation.getLiteralSystemId(); 2531 } 2532 } 2533 } 2534 } 2535 return null; 2536 } // getLiteralSystemId():String 2537 2538 /** 2539 * Return the line number where the current document event ends. 2540 * <p> 2541 * <strong>Warning:</strong> The return value from the method 2542 * is intended only as an approximation for the sake of error 2543 * reporting; it is not intended to provide sufficient information 2544 * to edit the character content of the original XML document. 2545 * <p> 2546 * The return value is an approximation of the line number 2547 * in the document entity or external parsed entity where the 2548 * markup triggering the event appears. 2549 * <p> 2550 * If possible, the SAX driver should provide the line position 2551 * of the first character after the text associated with the document 2552 * event. The first line in the document is line 1. 2553 * 2554 * @return The line number, or -1 if none is available. 2555 */ 2556 public int getLineNumber() { 2557 if (fCurrentEntity != null) { 2558 if (fCurrentEntity.isExternal()) { 2559 return fCurrentEntity.lineNumber; 2560 } else { 2561 // search for the first external entity on the stack 2562 int size = fEntityStack.size(); 2563 for (int i=size-1; i>0 ; i--) { 2564 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2565 if (firstExternalEntity.isExternal()) { 2566 return firstExternalEntity.lineNumber; 2567 } 2568 } 2569 } 2570 } 2571 2572 return -1; 2573 2574 } // getLineNumber():int 2575 2576 /** 2577 * Return the column number where the current document event ends. 2578 * <p> 2579 * <strong>Warning:</strong> The return value from the method 2580 * is intended only as an approximation for the sake of error 2581 * reporting; it is not intended to provide sufficient information 2582 * to edit the character content of the original XML document. 2583 * <p> 2584 * The return value is an approximation of the column number 2585 * in the document entity or external parsed entity where the 2586 * markup triggering the event appears. 2587 * <p> 2588 * If possible, the SAX driver should provide the line position 2589 * of the first character after the text associated with the document 2590 * event. 2591 * <p> 2592 * If possible, the SAX driver should provide the line position 2593 * of the first character after the text associated with the document 2594 * event. The first column in each line is column 1. 2595 * 2596 * @return The column number, or -1 if none is available. 2597 */ 2598 public int getColumnNumber() { 2599 if (fCurrentEntity != null) { 2600 if (fCurrentEntity.isExternal()) { 2601 return fCurrentEntity.columnNumber; 2602 } else { 2603 // search for the first external entity on the stack 2604 int size = fEntityStack.size(); 2605 for (int i=size-1; i>0 ; i--) { 2606 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2607 if (firstExternalEntity.isExternal()) { 2608 return firstExternalEntity.columnNumber; 2609 } 2610 } 2611 } 2612 } 2613 2614 return -1; 2615 } // getColumnNumber():int 2616 2617 2618 // 2619 // Protected static methods 2620 // 2621 2622 /** 2623 * Fixes a platform dependent filename to standard URI form. 2624 * 2625 * @param str The string to fix. 2626 * 2627 * @return Returns the fixed URI string. 2628 */ 2629 protected static String fixURI(String str) { 2630 2631 // handle platform dependent strings 2632 str = str.replace(java.io.File.separatorChar, '/'); 2633 2634 // Windows fix 2635 if (str.length() >= 2) { 2636 char ch1 = str.charAt(1); 2637 // change "C:blah" to "/C:blah" 2638 if (ch1 == ':') { 2639 char ch0 = Character.toUpperCase(str.charAt(0)); 2640 if (ch0 >= 'A' && ch0 <= 'Z') { 2641 str = "/" + str; 2642 } 2643 } 2644 // change "//blah" to "file://blah" 2645 else if (ch1 == '/' && str.charAt(0) == '/') { 2646 str = "file:" + str; 2647 } 2648 } 2649 2650 // replace spaces in file names with %20. 2651 // Original comment from JDK5: the following algorithm might not be 2652 // very performant, but people who want to use invalid URI's have to 2653 // pay the price. 2654 int pos = str.indexOf(' '); 2655 if (pos >= 0) { 2656 StringBuilder sb = new StringBuilder(str.length()); 2657 // put characters before ' ' into the string builder 2658 for (int i = 0; i < pos; i++) 2659 sb.append(str.charAt(i)); 2660 // and %20 for the space 2661 sb.append("%20"); 2662 // for the remamining part, also convert ' ' to "%20". 2663 for (int i = pos+1; i < str.length(); i++) { 2664 if (str.charAt(i) == ' ') 2665 sb.append("%20"); 2666 else 2667 sb.append(str.charAt(i)); 2668 } 2669 str = sb.toString(); 2670 } 2671 2672 // done 2673 return str; 2674 2675 } // fixURI(String):String 2676 2677 2678 // 2679 // Package visible methods 2680 // 2681 /** Prints the contents of the buffer. */ 2682 final void print() { 2683 if (DEBUG_BUFFER) { 2684 if (fCurrentEntity != null) { 2685 System.out.print('['); 2686 System.out.print(fCurrentEntity.count); 2687 System.out.print(' '); 2688 System.out.print(fCurrentEntity.position); 2689 if (fCurrentEntity.count > 0) { 2690 System.out.print(" \""); 2691 for (int i = 0; i < fCurrentEntity.count; i++) { 2692 if (i == fCurrentEntity.position) { 2693 System.out.print('^'); 2694 } 2695 char c = fCurrentEntity.ch[i]; 2696 switch (c) { 2697 case '\n': { 2698 System.out.print("\\n"); 2699 break; 2700 } 2701 case '\r': { 2702 System.out.print("\\r"); 2703 break; 2704 } 2705 case '\t': { 2706 System.out.print("\\t"); 2707 break; 2708 } 2709 case '\\': { 2710 System.out.print("\\\\"); 2711 break; 2712 } 2713 default: { 2714 System.out.print(c); 2715 } 2716 } 2717 } 2718 if (fCurrentEntity.position == fCurrentEntity.count) { 2719 System.out.print('^'); 2720 } 2721 System.out.print('"'); 2722 } 2723 System.out.print(']'); 2724 System.out.print(" @ "); 2725 System.out.print(fCurrentEntity.lineNumber); 2726 System.out.print(','); 2727 System.out.print(fCurrentEntity.columnNumber); 2728 } else { 2729 System.out.print("*NO CURRENT ENTITY*"); 2730 } 2731 } 2732 } // print() 2733 2734 /** 2735 * Buffer used in entity manager to reuse character arrays instead 2736 * of creating new ones every time. 2737 * 2738 * @xerces.internal 2739 * 2740 * @author Ankit Pasricha, IBM 2741 */ 2742 private static class CharacterBuffer { 2743 2744 /** character buffer */ 2745 private char[] ch; 2746 2747 /** whether the buffer is for an external or internal scanned entity */ 2748 private boolean isExternal; 2749 2750 public CharacterBuffer(boolean isExternal, int size) { 2751 this.isExternal = isExternal; 2752 ch = new char[size]; 2753 } 2754 } 2755 2756 2757 /** 2758 * Stores a number of character buffers and provides it to the entity 2759 * manager to use when an entity is seen. 2760 * 2761 * @xerces.internal 2762 * 2763 * @author Ankit Pasricha, IBM 2764 */ 2765 private static class CharacterBufferPool { 2766 2767 private static final int DEFAULT_POOL_SIZE = 3; 2768 2769 private CharacterBuffer[] fInternalBufferPool; 2770 private CharacterBuffer[] fExternalBufferPool; 2771 2772 private int fExternalBufferSize; 2773 private int fInternalBufferSize; 2774 private int poolSize; 2775 2776 private int fInternalTop; 2777 private int fExternalTop; 2778 2779 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2780 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2781 } 2782 2783 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2784 fExternalBufferSize = externalBufferSize; 2785 fInternalBufferSize = internalBufferSize; 2786 this.poolSize = poolSize; 2787 init(); 2788 } 2789 2790 /** Initializes buffer pool. **/ 2791 private void init() { 2792 fInternalBufferPool = new CharacterBuffer[poolSize]; 2793 fExternalBufferPool = new CharacterBuffer[poolSize]; 2794 fInternalTop = -1; 2795 fExternalTop = -1; 2796 } 2797 2798 /** Retrieves buffer from pool. **/ 2799 public CharacterBuffer getBuffer(boolean external) { 2800 if (external) { 2801 if (fExternalTop > -1) { 2802 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2803 } 2804 else { 2805 return new CharacterBuffer(true, fExternalBufferSize); 2806 } 2807 } 2808 else { 2809 if (fInternalTop > -1) { 2810 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2811 } 2812 else { 2813 return new CharacterBuffer(false, fInternalBufferSize); 2814 } 2815 } 2816 } 2817 2818 /** Returns buffer to pool. **/ 2819 public void returnToPool(CharacterBuffer buffer) { 2820 if (buffer.isExternal) { 2821 if (fExternalTop < fExternalBufferPool.length - 1) { 2822 fExternalBufferPool[++fExternalTop] = buffer; 2823 } 2824 } 2825 else if (fInternalTop < fInternalBufferPool.length - 1) { 2826 fInternalBufferPool[++fInternalTop] = buffer; 2827 } 2828 } 2829 2830 /** Sets the size of external buffers and dumps the old pool. **/ 2831 public void setExternalBufferSize(int bufferSize) { 2832 fExternalBufferSize = bufferSize; 2833 fExternalBufferPool = new CharacterBuffer[poolSize]; 2834 fExternalTop = -1; 2835 } 2836 } 2837 2838 /** 2839 * This class wraps the byte inputstreams we're presented with. 2840 * We need it because java.io.InputStreams don't provide 2841 * functionality to reread processed bytes, and they have a habit 2842 * of reading more than one character when you call their read() 2843 * methods. This means that, once we discover the true (declared) 2844 * encoding of a document, we can neither backtrack to read the 2845 * whole doc again nor start reading where we are with a new 2846 * reader. 2847 * 2848 * This class allows rewinding an inputStream by allowing a mark 2849 * to be set, and the stream reset to that position. <strong>The 2850 * class assumes that it needs to read one character per 2851 * invocation when it's read() method is inovked, but uses the 2852 * underlying InputStream's read(char[], offset length) method--it 2853 * won't buffer data read this way!</strong> 2854 * 2855 * @xerces.internal 2856 * 2857 * @author Neil Graham, IBM 2858 * @author Glenn Marcy, IBM 2859 */ 2860 2861 protected final class RewindableInputStream extends InputStream { 2862 2863 private InputStream fInputStream; 2864 private byte[] fData; 2865 private int fStartOffset; 2866 private int fEndOffset; 2867 private int fOffset; 2868 private int fLength; 2869 private int fMark; 2870 2871 public RewindableInputStream(InputStream is) { 2872 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2873 fInputStream = is; 2874 fStartOffset = 0; 2875 fEndOffset = -1; 2876 fOffset = 0; 2877 fLength = 0; 2878 fMark = 0; 2879 } 2880 2881 public void setStartOffset(int offset) { 2882 fStartOffset = offset; 2883 } 2884 2885 public void rewind() { 2886 fOffset = fStartOffset; 2887 } 2888 2889 public int read() throws IOException { 2890 int b = 0; 2891 if (fOffset < fLength) { 2892 return fData[fOffset++] & 0xff; 2893 } 2894 if (fOffset == fEndOffset) { 2895 return -1; 2896 } 2897 if (fOffset == fData.length) { 2898 byte[] newData = new byte[fOffset << 1]; 2899 System.arraycopy(fData, 0, newData, 0, fOffset); 2900 fData = newData; 2901 } 2902 b = fInputStream.read(); 2903 if (b == -1) { 2904 fEndOffset = fOffset; 2905 return -1; 2906 } 2907 fData[fLength++] = (byte)b; 2908 fOffset++; 2909 return b & 0xff; 2910 } 2911 2912 public int read(byte[] b, int off, int len) throws IOException { 2913 int bytesLeft = fLength - fOffset; 2914 if (bytesLeft == 0) { 2915 if (fOffset == fEndOffset) { 2916 return -1; 2917 } 2918 2919 /** 2920 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2921 * //System.out.println("fInputStream = " + fInputStream ); 2922 * // better get some more for the voracious reader... */ 2923 2924 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2925 2926 if (!fCurrentEntity.xmlDeclChunkRead) 2927 { 2928 fCurrentEntity.xmlDeclChunkRead = true; 2929 len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2930 } 2931 return fInputStream.read(b, off, len); 2932 } 2933 2934 int returnedVal = read(); 2935 if(returnedVal == -1) { 2936 fEndOffset = fOffset; 2937 return -1; 2938 } 2939 b[off] = (byte)returnedVal; 2940 return 1; 2941 2942 } 2943 if (len < bytesLeft) { 2944 if (len <= 0) { 2945 return 0; 2946 } 2947 } else { 2948 len = bytesLeft; 2949 } 2950 if (b != null) { 2951 System.arraycopy(fData, fOffset, b, off, len); 2952 } 2953 fOffset += len; 2954 return len; 2955 } 2956 2957 public long skip(long n) 2958 throws IOException { 2959 int bytesLeft; 2960 if (n <= 0) { 2961 return 0; 2962 } 2963 bytesLeft = fLength - fOffset; 2964 if (bytesLeft == 0) { 2965 if (fOffset == fEndOffset) { 2966 return 0; 2967 } 2968 return fInputStream.skip(n); 2969 } 2970 if (n <= bytesLeft) { 2971 fOffset += n; 2972 return n; 2973 } 2974 fOffset += bytesLeft; 2975 if (fOffset == fEndOffset) { 2976 return bytesLeft; 2977 } 2978 n -= bytesLeft; 2979 /* 2980 * In a manner of speaking, when this class isn't permitting more 2981 * than one byte at a time to be read, it is "blocking". The 2982 * available() method should indicate how much can be read without 2983 * blocking, so while we're in this mode, it should only indicate 2984 * that bytes in its buffer are available; otherwise, the result of 2985 * available() on the underlying InputStream is appropriate. 2986 */ 2987 return fInputStream.skip(n) + bytesLeft; 2988 } 2989 2990 public int available() throws IOException { 2991 int bytesLeft = fLength - fOffset; 2992 if (bytesLeft == 0) { 2993 if (fOffset == fEndOffset) { 2994 return -1; 2995 } 2996 return fCurrentEntity.mayReadChunks ? fInputStream.available() 2997 : 0; 2998 } 2999 return bytesLeft; 3000 } 3001 3002 public void mark(int howMuch) { 3003 fMark = fOffset; 3004 } 3005 3006 public void reset() { 3007 fOffset = fMark; 3008 //test(); 3009 } 3010 3011 public boolean markSupported() { 3012 return true; 3013 } 3014 3015 public void close() throws IOException { 3016 if (fInputStream != null) { 3017 fInputStream.close(); 3018 fInputStream = null; 3019 } 3020 } 3021 } // end of RewindableInputStream class 3022 3023 public void test(){ 3024 //System.out.println("TESTING: Added familytree to entityManager"); 3025 //Usecase1 3026 fEntityStorage.addExternalEntity("entityUsecase1",null, 3027 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 3028 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 3029 3030 //Usecase2 3031 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3032 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3033 fEntityStorage.addInternalEntity("text", "Hello World."); 3034 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3035 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3036 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3037 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3038 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3039 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3040 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3041 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3042 fEntityStorage.addInternalEntity("ch","©"); 3043 fEntityStorage.addInternalEntity("ch1","T"); 3044 fEntityStorage.addInternalEntity("% ch2","param"); 3045 } 3046 3047 } // class XMLEntityManager