1 /*
   2  * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl ;
  22 
  23 import com.sun.org.apache.xerces.internal.impl.Constants;
  24 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler;
  25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  29 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
  30 import com.sun.org.apache.xerces.internal.util.*;
  31 import com.sun.org.apache.xerces.internal.util.URI;
  32 import com.sun.org.apache.xerces.internal.utils.SecuritySupport;
  33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
  34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
  35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager;
  36 import com.sun.org.apache.xerces.internal.xni.Augmentations;
  37 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
  38 import com.sun.org.apache.xerces.internal.xni.XNIException;
  39 import com.sun.org.apache.xerces.internal.xni.parser.*;
  40 import com.sun.xml.internal.stream.Entity;
  41 import com.sun.xml.internal.stream.StaxEntityResolverWrapper;
  42 import com.sun.xml.internal.stream.StaxXMLInputSource;
  43 import com.sun.xml.internal.stream.XMLEntityStorage;
  44 import java.io.*;
  45 import java.lang.reflect.Method;
  46 import java.net.HttpURLConnection;
  47 import java.net.URISyntaxException;
  48 import java.net.URL;
  49 import java.net.URLConnection;
  50 import java.util.HashMap;
  51 import java.util.Iterator;
  52 import java.util.Locale;
  53 import java.util.Map;
  54 import java.util.Stack;
  55 import java.util.StringTokenizer;
  56 import javax.xml.stream.XMLInputFactory;
  57 
  58 
  59 /**
  60  * Will keep track of current entity.
  61  *
  62  * The entity manager handles the registration of general and parameter
  63  * entities; resolves entities; and starts entities. The entity manager
  64  * is a central component in a standard parser configuration and this
  65  * class works directly with the entity scanner to manage the underlying
  66  * xni.
  67  * <p>
  68  * This component requires the following features and properties from the
  69  * component manager that uses it:
  70  * <ul>
  71  *  <li>http://xml.org/sax/features/validation</li>
  72  *  <li>http://xml.org/sax/features/external-general-entities</li>
  73  *  <li>http://xml.org/sax/features/external-parameter-entities</li>
  74  *  <li>http://apache.org/xml/features/allow-java-encodings</li>
  75  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
  76  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
  77  *  <li>http://apache.org/xml/properties/internal/entity-resolver</li>
  78  * </ul>
  79  *
  80  *
  81  * @author Andy Clark, IBM
  82  * @author Arnaud  Le Hors, IBM
  83  * @author K.Venugopal SUN Microsystems
  84  * @author Neeraj Bajaj SUN Microsystems
  85  * @author Sunitha Reddy SUN Microsystems
  86  */
  87 public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
  88 
  89     //
  90     // Constants
  91     //
  92 
  93     /** Default buffer size (2048). */
  94     public static final int DEFAULT_BUFFER_SIZE = 8192;
  95 
  96     /** Default buffer size before we've finished with the XMLDecl:  */
  97     public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
  98 
  99     /** Default internal entity buffer size (1024). */
 100     public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024;
 101 
 102     // feature identifiers
 103 
 104     /** Feature identifier: validation. */
 105     protected static final String VALIDATION =
 106             Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
 107 
 108     /**
 109      * standard uri conformant (strict uri).
 110      * http://apache.org/xml/features/standard-uri-conformant
 111      */
 112     protected boolean fStrictURI;
 113 
 114 
 115     /** Feature identifier: external general entities. */
 116     protected static final String EXTERNAL_GENERAL_ENTITIES =
 117             Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE;
 118 
 119     /** Feature identifier: external parameter entities. */
 120     protected static final String EXTERNAL_PARAMETER_ENTITIES =
 121             Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE;
 122 
 123     /** Feature identifier: allow Java encodings. */
 124     protected static final String ALLOW_JAVA_ENCODINGS =
 125             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 126 
 127     /** Feature identifier: warn on duplicate EntityDef */
 128     protected static final String WARN_ON_DUPLICATE_ENTITYDEF =
 129             Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE;
 130 
 131     /** Feature identifier: load external DTD. */
 132     protected static final String LOAD_EXTERNAL_DTD =
 133             Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
 134 
 135     // property identifiers
 136 
 137     /** Property identifier: symbol table. */
 138     protected static final String SYMBOL_TABLE =
 139             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 140 
 141     /** Property identifier: error reporter. */
 142     protected static final String ERROR_REPORTER =
 143             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 144 
 145     /** Feature identifier: standard uri conformant */
 146     protected static final String STANDARD_URI_CONFORMANT =
 147             Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE;
 148 
 149     /** Property identifier: entity resolver. */
 150     protected static final String ENTITY_RESOLVER =
 151             Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
 152 
 153     protected static final String STAX_ENTITY_RESOLVER =
 154             Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY;
 155 
 156     // property identifier:  ValidationManager
 157     protected static final String VALIDATION_MANAGER =
 158             Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
 159 
 160     /** property identifier: buffer size. */
 161     protected static final String BUFFER_SIZE =
 162             Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY;
 163 
 164     /** property identifier: security manager. */
 165     protected static final String SECURITY_MANAGER =
 166         Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY;
 167 
 168     protected static final String PARSER_SETTINGS =
 169         Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
 170 
 171     /** Property identifier: Security property manager. */
 172     private static final String XML_SECURITY_PROPERTY_MANAGER =
 173             Constants.XML_SECURITY_PROPERTY_MANAGER;
 174 
 175     /** access external dtd: file protocol */
 176     static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT;
 177 
 178     // recognized features and properties
 179 
 180     /** Recognized features. */
 181     private static final String[] RECOGNIZED_FEATURES = {
 182                 VALIDATION,
 183                 EXTERNAL_GENERAL_ENTITIES,
 184                 EXTERNAL_PARAMETER_ENTITIES,
 185                 ALLOW_JAVA_ENCODINGS,
 186                 WARN_ON_DUPLICATE_ENTITYDEF,
 187                 STANDARD_URI_CONFORMANT
 188     };
 189 
 190     /** Feature defaults. */
 191     private static final Boolean[] FEATURE_DEFAULTS = {
 192                 null,
 193                 Boolean.TRUE,
 194                 Boolean.TRUE,
 195                 Boolean.TRUE,
 196                 Boolean.FALSE,
 197                 Boolean.FALSE
 198     };
 199 
 200     /** Recognized properties. */
 201     private static final String[] RECOGNIZED_PROPERTIES = {
 202                 SYMBOL_TABLE,
 203                 ERROR_REPORTER,
 204                 ENTITY_RESOLVER,
 205                 VALIDATION_MANAGER,
 206                 BUFFER_SIZE,
 207                 SECURITY_MANAGER,
 208                 XML_SECURITY_PROPERTY_MANAGER
 209     };
 210 
 211     /** Property defaults. */
 212     private static final Object[] PROPERTY_DEFAULTS = {
 213                 null,
 214                 null,
 215                 null,
 216                 null,
 217                 new Integer(DEFAULT_BUFFER_SIZE),
 218                 null,
 219                 null
 220     };
 221 
 222     private static final String XMLEntity = "[xml]".intern();
 223     private static final String DTDEntity = "[dtd]".intern();
 224 
 225     // debugging
 226 
 227     /**
 228      * Debug printing of buffer. This debugging flag works best when you
 229      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
 230      * 64 characters.
 231      */
 232     private static final boolean DEBUG_BUFFER = false;
 233 
 234     /** warn on duplicate Entity declaration.
 235      *  http://apache.org/xml/features/warn-on-duplicate-entitydef
 236      */
 237     protected boolean fWarnDuplicateEntityDef;
 238 
 239     /** Debug some basic entities. */
 240     private static final boolean DEBUG_ENTITIES = false;
 241 
 242     /** Debug switching readers for encodings. */
 243     private static final boolean DEBUG_ENCODINGS = false;
 244 
 245     // should be diplayed trace resolving messages
 246     private static final boolean DEBUG_RESOLVER = false ;
 247 
 248     //
 249     // Data
 250     //
 251 
 252     // features
 253 
 254     /**
 255      * Validation. This feature identifier is:
 256      * http://xml.org/sax/features/validation
 257      */
 258     protected boolean fValidation;
 259 
 260     /**
 261      * External general entities. This feature identifier is:
 262      * http://xml.org/sax/features/external-general-entities
 263      */
 264     protected boolean fExternalGeneralEntities;
 265 
 266     /**
 267      * External parameter entities. This feature identifier is:
 268      * http://xml.org/sax/features/external-parameter-entities
 269      */
 270     protected boolean fExternalParameterEntities;
 271 
 272     /**
 273      * Allow Java encoding names. This feature identifier is:
 274      * http://apache.org/xml/features/allow-java-encodings
 275      */
 276     protected boolean fAllowJavaEncodings = true ;
 277 
 278     /** Load external DTD. */
 279     protected boolean fLoadExternalDTD = true;
 280 
 281     // properties
 282 
 283     /**
 284      * Symbol table. This property identifier is:
 285      * http://apache.org/xml/properties/internal/symbol-table
 286      */
 287     protected SymbolTable fSymbolTable;
 288 
 289     /**
 290      * Error reporter. This property identifier is:
 291      * http://apache.org/xml/properties/internal/error-reporter
 292      */
 293     protected XMLErrorReporter fErrorReporter;
 294 
 295     /**
 296      * Entity resolver. This property identifier is:
 297      * http://apache.org/xml/properties/internal/entity-resolver
 298      */
 299     protected XMLEntityResolver fEntityResolver;
 300 
 301     /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */
 302 
 303     protected StaxEntityResolverWrapper fStaxEntityResolver;
 304 
 305     /** Property Manager. This is used from Stax */
 306     protected PropertyManager fPropertyManager ;
 307 
 308     /** StAX properties */
 309     boolean fSupportDTD = true;
 310     boolean fReplaceEntityReferences = true;
 311     boolean fSupportExternalEntities = true;
 312 
 313     /** used to restrict external access */
 314     protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT;
 315 
 316     // settings
 317 
 318     /**
 319      * Validation manager. This property identifier is:
 320      * http://apache.org/xml/properties/internal/validation-manager
 321      */
 322     protected ValidationManager fValidationManager;
 323 
 324     // settings
 325 
 326     /**
 327      * Buffer size. We get this value from a property. The default size
 328      * is used if the input buffer size property is not specified.
 329      * REVISIT: do we need a property for internal entity buffer size?
 330      */
 331     protected int fBufferSize = DEFAULT_BUFFER_SIZE;
 332 
 333     /** Security Manager */
 334     protected XMLSecurityManager fSecurityManager = null;
 335 
 336     protected XMLLimitAnalyzer fLimitAnalyzer = null;
 337 
 338     protected int entityExpansionIndex;
 339 
 340     /**
 341      * True if the document entity is standalone. This should really
 342      * only be set by the document source (e.g. XMLDocumentScanner).
 343      */
 344     protected boolean fStandalone;
 345 
 346     // are the entities being parsed in the external subset?
 347     // NOTE:  this *is not* the same as whether they're external entities!
 348     protected boolean fInExternalSubset = false;
 349 
 350 
 351     // handlers
 352     /** Entity handler. */
 353     protected XMLEntityHandler fEntityHandler;
 354 
 355     /** Current entity scanner */
 356     protected XMLEntityScanner fEntityScanner ;
 357 
 358     /** XML 1.0 entity scanner. */
 359     protected XMLEntityScanner fXML10EntityScanner;
 360 
 361     /** XML 1.1 entity scanner. */
 362     protected XMLEntityScanner fXML11EntityScanner;
 363 
 364     /** count of entities expanded: */
 365     protected int fEntityExpansionCount = 0;
 366 
 367     // entities
 368 
 369     /** Entities. */
 370     protected Map<String, Entity> fEntities = new HashMap<>();
 371 
 372     /** Entity stack. */
 373     protected Stack fEntityStack = new Stack();
 374 
 375     /** Current entity. */
 376     protected Entity.ScannedEntity fCurrentEntity = null;
 377 
 378     /** identify if the InputSource is created by a resolver */
 379     boolean fISCreatedByResolver = false;
 380 
 381     // shared context
 382 
 383     protected XMLEntityStorage fEntityStorage ;
 384 
 385     protected final Object [] defaultEncoding = new Object[]{"UTF-8", null};
 386 
 387 
 388     // temp vars
 389 
 390     /** Resource identifer. */
 391     private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
 392 
 393     /** Augmentations for entities. */
 394     private final Augmentations fEntityAugs = new AugmentationsImpl();
 395 
 396     /** Pool of character buffers. */
 397     private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
 398 
 399     //
 400     // Constructors
 401     //
 402 
 403     /**
 404      * If this constructor is used to create the object, reset() should be invoked on this object
 405      */
 406     public XMLEntityManager() {
 407         //for entity managers not created by parsers
 408         fSecurityManager = new XMLSecurityManager(true);
 409         fEntityStorage = new XMLEntityStorage(this) ;
 410         setScannerVersion(Constants.XML_VERSION_1_0);
 411     } // <init>()
 412 
 413     /** Default constructor. */
 414     public XMLEntityManager(PropertyManager propertyManager) {
 415         fPropertyManager = propertyManager ;
 416         //pass a reference to current entity being scanned
 417         //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ;
 418         fEntityStorage = new XMLEntityStorage(this) ;
 419         fEntityScanner = new XMLEntityScanner(propertyManager, this) ;
 420         reset(propertyManager);
 421     } // <init>()
 422 
 423     /**
 424      * Adds an internal entity declaration.
 425      * <p>
 426      * <strong>Note:</strong> This method ignores subsequent entity
 427      * declarations.
 428      * <p>
 429      * <strong>Note:</strong> The name should be a unique symbol. The
 430      * SymbolTable can be used for this purpose.
 431      *
 432      * @param name The name of the entity.
 433      * @param text The text of the entity.
 434      *
 435      * @see SymbolTable
 436      */
 437     public void addInternalEntity(String name, String text) {
 438         if (!fEntities.containsKey(name)) {
 439             Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset);
 440             fEntities.put(name, entity);
 441         } else{
 442             if(fWarnDuplicateEntityDef){
 443                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 444                         "MSG_DUPLICATE_ENTITY_DEFINITION",
 445                         new Object[]{ name },
 446                         XMLErrorReporter.SEVERITY_WARNING );
 447             }
 448         }
 449 
 450     } // addInternalEntity(String,String)
 451 
 452     /**
 453      * Adds an external entity declaration.
 454      * <p>
 455      * <strong>Note:</strong> This method ignores subsequent entity
 456      * declarations.
 457      * <p>
 458      * <strong>Note:</strong> The name should be a unique symbol. The
 459      * SymbolTable can be used for this purpose.
 460      *
 461      * @param name         The name of the entity.
 462      * @param publicId     The public identifier of the entity.
 463      * @param literalSystemId     The system identifier of the entity.
 464      * @param baseSystemId The base system identifier of the entity.
 465      *                     This is the system identifier of the entity
 466      *                     where <em>the entity being added</em> and
 467      *                     is used to expand the system identifier when
 468      *                     the system identifier is a relative URI.
 469      *                     When null the system identifier of the first
 470      *                     external entity on the stack is used instead.
 471      *
 472      * @see SymbolTable
 473      */
 474     public void addExternalEntity(String name,
 475             String publicId, String literalSystemId,
 476             String baseSystemId) throws IOException {
 477         if (!fEntities.containsKey(name)) {
 478             if (baseSystemId == null) {
 479                 // search for the first external entity on the stack
 480                 int size = fEntityStack.size();
 481                 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) {
 482                     baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId();
 483                 }
 484                 for (int i = size - 1; i >= 0 ; i--) {
 485                     Entity.ScannedEntity externalEntity =
 486                             (Entity.ScannedEntity)fEntityStack.elementAt(i);
 487                     if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) {
 488                         baseSystemId = externalEntity.entityLocation.getExpandedSystemId();
 489                         break;
 490                     }
 491                 }
 492             }
 493             Entity entity = new Entity.ExternalEntity(name,
 494                     new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId,
 495                     expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset);
 496             fEntities.put(name, entity);
 497         } else{
 498             if(fWarnDuplicateEntityDef){
 499                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 500                         "MSG_DUPLICATE_ENTITY_DEFINITION",
 501                         new Object[]{ name },
 502                         XMLErrorReporter.SEVERITY_WARNING );
 503             }
 504         }
 505 
 506     } // addExternalEntity(String,String,String,String)
 507 
 508 
 509     /**
 510      * Adds an unparsed entity declaration.
 511      * <p>
 512      * <strong>Note:</strong> This method ignores subsequent entity
 513      * declarations.
 514      * <p>
 515      * <strong>Note:</strong> The name should be a unique symbol. The
 516      * SymbolTable can be used for this purpose.
 517      *
 518      * @param name     The name of the entity.
 519      * @param publicId The public identifier of the entity.
 520      * @param systemId The system identifier of the entity.
 521      * @param notation The name of the notation.
 522      *
 523      * @see SymbolTable
 524      */
 525     public void addUnparsedEntity(String name,
 526             String publicId, String systemId,
 527             String baseSystemId, String notation) {
 528         if (!fEntities.containsKey(name)) {
 529             Entity.ExternalEntity entity = new Entity.ExternalEntity(name,
 530                     new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null),
 531                     notation, fInExternalSubset);
 532             fEntities.put(name, entity);
 533         } else{
 534             if(fWarnDuplicateEntityDef){
 535                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 536                         "MSG_DUPLICATE_ENTITY_DEFINITION",
 537                         new Object[]{ name },
 538                         XMLErrorReporter.SEVERITY_WARNING );
 539             }
 540         }
 541     } // addUnparsedEntity(String,String,String,String)
 542 
 543 
 544     /** get the entity storage object from entity manager */
 545     public XMLEntityStorage getEntityStore(){
 546         return fEntityStorage ;
 547     }
 548 
 549     /** return the entity responsible for reading the entity */
 550     public XMLEntityScanner getEntityScanner(){
 551         if(fEntityScanner == null) {
 552             // default to 1.0
 553             if(fXML10EntityScanner == null) {
 554                 fXML10EntityScanner = new XMLEntityScanner();
 555             }
 556             fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
 557             fEntityScanner = fXML10EntityScanner;
 558         }
 559         return fEntityScanner;
 560 
 561     }
 562 
 563     public void setScannerVersion(short version) {
 564 
 565         if(version == Constants.XML_VERSION_1_0) {
 566             if(fXML10EntityScanner == null) {
 567                 fXML10EntityScanner = new XMLEntityScanner();
 568             }
 569             fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
 570             fEntityScanner = fXML10EntityScanner;
 571             fEntityScanner.setCurrentEntity(fCurrentEntity);
 572         } else {
 573             if(fXML11EntityScanner == null) {
 574                 fXML11EntityScanner = new XML11EntityScanner();
 575             }
 576             fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter);
 577             fEntityScanner = fXML11EntityScanner;
 578             fEntityScanner.setCurrentEntity(fCurrentEntity);
 579         }
 580 
 581     }
 582 
 583     /**
 584      * This method uses the passed-in XMLInputSource to make
 585      * fCurrentEntity usable for reading.
 586      *
 587      * @param reference flag to indicate whether the entity is an Entity Reference.
 588      * @param name  name of the entity (XML is it's the document entity)
 589      * @param xmlInputSource    the input source, with sufficient information
 590      *      to begin scanning characters.
 591      * @param literal        True if this entity is started within a
 592      *                       literal value.
 593      * @param isExternal    whether this entity should be treated as an internal or external entity.
 594      * @throws IOException  if anything can't be read
 595      *  XNIException    If any parser-specific goes wrong.
 596      * @return the encoding of the new entity or null if a character stream was employed
 597      */
 598     public String setupCurrentEntity(boolean reference, String name, XMLInputSource xmlInputSource,
 599             boolean literal, boolean isExternal)
 600             throws IOException, XNIException {
 601         // get information
 602 
 603         final String publicId = xmlInputSource.getPublicId();
 604         String literalSystemId = xmlInputSource.getSystemId();
 605         String baseSystemId = xmlInputSource.getBaseSystemId();
 606         String encoding = xmlInputSource.getEncoding();
 607         final boolean encodingExternallySpecified = (encoding != null);
 608         Boolean isBigEndian = null;
 609 
 610         // create reader
 611         InputStream stream = null;
 612         Reader reader = xmlInputSource.getCharacterStream();
 613 
 614         // First chance checking strict URI
 615         String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI);
 616         if (baseSystemId == null) {
 617             baseSystemId = expandedSystemId;
 618         }
 619         if (reader == null) {
 620             stream = xmlInputSource.getByteStream();
 621             if (stream == null) {
 622                 URL location = new URL(expandedSystemId);
 623                 URLConnection connect = location.openConnection();
 624                 if (!(connect instanceof HttpURLConnection)) {
 625                     stream = connect.getInputStream();
 626                 }
 627                 else {
 628                     boolean followRedirects = true;
 629 
 630                     // setup URLConnection if we have an HTTPInputSource
 631                     if (xmlInputSource instanceof HTTPInputSource) {
 632                         final HttpURLConnection urlConnection = (HttpURLConnection) connect;
 633                         final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource;
 634 
 635                         // set request properties
 636                         Iterator propIter = httpInputSource.getHTTPRequestProperties();
 637                         while (propIter.hasNext()) {
 638                             Map.Entry entry = (Map.Entry) propIter.next();
 639                             urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue());
 640                         }
 641 
 642                         // set preference for redirection
 643                         followRedirects = httpInputSource.getFollowHTTPRedirects();
 644                         if (!followRedirects) {
 645                             urlConnection.setInstanceFollowRedirects(followRedirects);
 646                         }
 647                     }
 648 
 649                     stream = connect.getInputStream();
 650 
 651                     // REVISIT: If the URLConnection has external encoding
 652                     // information, we should be reading it here. It's located
 653                     // in the charset parameter of Content-Type. -- mrglavas
 654 
 655                     if (followRedirects) {
 656                         String redirect = connect.getURL().toString();
 657                         // E43: Check if the URL was redirected, and then
 658                         // update literal and expanded system IDs if needed.
 659                         if (!redirect.equals(expandedSystemId)) {
 660                             literalSystemId = redirect;
 661                             expandedSystemId = redirect;
 662                         }
 663                     }
 664                 }
 665             }
 666 
 667             // wrap this stream in RewindableInputStream
 668             stream = new RewindableInputStream(stream);
 669 
 670             // perform auto-detect of encoding if necessary
 671             if (encoding == null) {
 672                 // read first four bytes and determine encoding
 673                 final byte[] b4 = new byte[4];
 674                 int count = 0;
 675                 for (; count<4; count++ ) {
 676                     b4[count] = (byte)stream.read();
 677                 }
 678                 if (count == 4) {
 679                     Object [] encodingDesc = getEncodingName(b4, count);
 680                     encoding = (String)(encodingDesc[0]);
 681                     isBigEndian = (Boolean)(encodingDesc[1]);
 682 
 683                     stream.reset();
 684                     // Special case UTF-8 files with BOM created by Microsoft
 685                     // tools. It's more efficient to consume the BOM than make
 686                     // the reader perform extra checks. -Ac
 687                     if (count > 2 && encoding.equals("UTF-8")) {
 688                         int b0 = b4[0] & 0xFF;
 689                         int b1 = b4[1] & 0xFF;
 690                         int b2 = b4[2] & 0xFF;
 691                         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
 692                             // ignore first three bytes...
 693                             stream.skip(3);
 694                         }
 695                     }
 696                     reader = createReader(stream, encoding, isBigEndian);
 697                 } else {
 698                     reader = createReader(stream, encoding, isBigEndian);
 699                 }
 700             }
 701 
 702             // use specified encoding
 703             else {
 704                 encoding = encoding.toUpperCase(Locale.ENGLISH);
 705 
 706                 // If encoding is UTF-8, consume BOM if one is present.
 707                 if (encoding.equals("UTF-8")) {
 708                     final int[] b3 = new int[3];
 709                     int count = 0;
 710                     for (; count < 3; ++count) {
 711                         b3[count] = stream.read();
 712                         if (b3[count] == -1)
 713                             break;
 714                     }
 715                     if (count == 3) {
 716                         if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
 717                             // First three bytes are not BOM, so reset.
 718                             stream.reset();
 719                         }
 720                     } else {
 721                         stream.reset();
 722                     }
 723                 }
 724                 // If encoding is UTF-16, we still need to read the first four bytes
 725                 // in order to discover the byte order.
 726                 else if (encoding.equals("UTF-16")) {
 727                     final int[] b4 = new int[4];
 728                     int count = 0;
 729                     for (; count < 4; ++count) {
 730                         b4[count] = stream.read();
 731                         if (b4[count] == -1)
 732                             break;
 733                     }
 734                     stream.reset();
 735 
 736                     String utf16Encoding = "UTF-16";
 737                     if (count >= 2) {
 738                         final int b0 = b4[0];
 739                         final int b1 = b4[1];
 740                         if (b0 == 0xFE && b1 == 0xFF) {
 741                             // UTF-16, big-endian
 742                             utf16Encoding = "UTF-16BE";
 743                             isBigEndian = Boolean.TRUE;
 744                         }
 745                         else if (b0 == 0xFF && b1 == 0xFE) {
 746                             // UTF-16, little-endian
 747                             utf16Encoding = "UTF-16LE";
 748                             isBigEndian = Boolean.FALSE;
 749                         }
 750                         else if (count == 4) {
 751                             final int b2 = b4[2];
 752                             final int b3 = b4[3];
 753                             if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
 754                                 // UTF-16, big-endian, no BOM
 755                                 utf16Encoding = "UTF-16BE";
 756                                 isBigEndian = Boolean.TRUE;
 757                             }
 758                             if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
 759                                 // UTF-16, little-endian, no BOM
 760                                 utf16Encoding = "UTF-16LE";
 761                                 isBigEndian = Boolean.FALSE;
 762                             }
 763                         }
 764                     }
 765                     reader = createReader(stream, utf16Encoding, isBigEndian);
 766                 }
 767                 // If encoding is UCS-4, we still need to read the first four bytes
 768                 // in order to discover the byte order.
 769                 else if (encoding.equals("ISO-10646-UCS-4")) {
 770                     final int[] b4 = new int[4];
 771                     int count = 0;
 772                     for (; count < 4; ++count) {
 773                         b4[count] = stream.read();
 774                         if (b4[count] == -1)
 775                             break;
 776                     }
 777                     stream.reset();
 778 
 779                     // Ignore unusual octet order for now.
 780                     if (count == 4) {
 781                         // UCS-4, big endian (1234)
 782                         if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) {
 783                             isBigEndian = Boolean.TRUE;
 784                         }
 785                         // UCS-4, little endian (1234)
 786                         else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) {
 787                             isBigEndian = Boolean.FALSE;
 788                         }
 789                     }
 790                 }
 791                 // If encoding is UCS-2, we still need to read the first four bytes
 792                 // in order to discover the byte order.
 793                 else if (encoding.equals("ISO-10646-UCS-2")) {
 794                     final int[] b4 = new int[4];
 795                     int count = 0;
 796                     for (; count < 4; ++count) {
 797                         b4[count] = stream.read();
 798                         if (b4[count] == -1)
 799                             break;
 800                     }
 801                     stream.reset();
 802 
 803                     if (count == 4) {
 804                         // UCS-2, big endian
 805                         if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) {
 806                             isBigEndian = Boolean.TRUE;
 807                         }
 808                         // UCS-2, little endian
 809                         else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) {
 810                             isBigEndian = Boolean.FALSE;
 811                         }
 812                     }
 813                 }
 814 
 815                 reader = createReader(stream, encoding, isBigEndian);
 816             }
 817 
 818             // read one character at a time so we don't jump too far
 819             // ahead, converting characters from the byte stream in
 820             // the wrong encoding
 821             if (DEBUG_ENCODINGS) {
 822                 System.out.println("$$$ no longer wrapping reader in OneCharReader");
 823             }
 824             //reader = new OneCharReader(reader);
 825         }
 826 
 827         // We've seen a new Reader.
 828         // Push it on the stack so we can close it later.
 829         //fOwnReaders.add(reader);
 830 
 831         // push entity on stack
 832         if (fCurrentEntity != null) {
 833             fEntityStack.push(fCurrentEntity);
 834         }
 835 
 836         // create entity
 837         /* if encoding is specified externally, 'encoding' information present
 838          * in the prolog of the XML document is not considered. Hence, prolog can
 839          * be read in Chunks of data instead of byte by byte.
 840          */
 841         fCurrentEntity = new Entity.ScannedEntity(reference, name,
 842                 new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),
 843                 stream, reader, encoding, literal, encodingExternallySpecified, isExternal);
 844         fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified);
 845         fEntityScanner.setCurrentEntity(fCurrentEntity);
 846         fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId);
 847         if (fLimitAnalyzer != null) {
 848             fLimitAnalyzer.startEntity(name);
 849         }
 850         return encoding;
 851     } //setupCurrentEntity(String, XMLInputSource, boolean, boolean):  String
 852 
 853 
 854     /**
 855      * Checks whether an entity given by name is external.
 856      *
 857      * @param entityName The name of the entity to check.
 858      * @return True if the entity is external, false otherwise
 859      * (including when the entity is not declared).
 860      */
 861     public boolean isExternalEntity(String entityName) {
 862 
 863         Entity entity = fEntities.get(entityName);
 864         if (entity == null) {
 865             return false;
 866         }
 867         return entity.isExternal();
 868     }
 869 
 870     /**
 871      * Checks whether the declaration of an entity given by name is
 872      * // in the external subset.
 873      *
 874      * @param entityName The name of the entity to check.
 875      * @return True if the entity was declared in the external subset, false otherwise
 876      *           (including when the entity is not declared).
 877      */
 878     public boolean isEntityDeclInExternalSubset(String entityName) {
 879 
 880         Entity entity = fEntities.get(entityName);
 881         if (entity == null) {
 882             return false;
 883         }
 884         return entity.isEntityDeclInExternalSubset();
 885     }
 886 
 887 
 888 
 889     //
 890     // Public methods
 891     //
 892 
 893     /**
 894      * Sets whether the document entity is standalone.
 895      *
 896      * @param standalone True if document entity is standalone.
 897      */
 898     public void setStandalone(boolean standalone) {
 899         fStandalone = standalone;
 900     }
 901     // setStandalone(boolean)
 902 
 903     /** Returns true if the document entity is standalone. */
 904     public boolean isStandalone() {
 905         return fStandalone;
 906     }  //isStandalone():boolean
 907 
 908     public boolean isDeclaredEntity(String entityName) {
 909 
 910         Entity entity = fEntities.get(entityName);
 911         return entity != null;
 912     }
 913 
 914     public boolean isUnparsedEntity(String entityName) {
 915 
 916         Entity entity = fEntities.get(entityName);
 917         if (entity == null) {
 918             return false;
 919         }
 920         return entity.isUnparsed();
 921     }
 922 
 923 
 924 
 925     // this simply returns the fResourceIdentifier object;
 926     // this should only be used with caution by callers that
 927     // carefully manage the entity manager's behaviour, so that
 928     // this doesn't returning meaningless or misleading data.
 929     // @return  a reference to the current fResourceIdentifier object
 930     public XMLResourceIdentifier getCurrentResourceIdentifier() {
 931         return fResourceIdentifier;
 932     }
 933 
 934     /**
 935      * Sets the entity handler. When an entity starts and ends, the
 936      * entity handler is notified of the change.
 937      *
 938      * @param entityHandler The new entity handler.
 939      */
 940 
 941     public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) {
 942         fEntityHandler = (XMLEntityHandler) entityHandler;
 943     } // setEntityHandler(XMLEntityHandler)
 944 
 945     //this function returns StaxXMLInputSource
 946     public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{
 947 
 948         if(resourceIdentifier == null ) return null;
 949 
 950         String publicId = resourceIdentifier.getPublicId();
 951         String literalSystemId = resourceIdentifier.getLiteralSystemId();
 952         String baseSystemId = resourceIdentifier.getBaseSystemId();
 953         String expandedSystemId = resourceIdentifier.getExpandedSystemId();
 954         // if no base systemId given, assume that it's relative
 955         // to the systemId of the current scanned entity
 956         // Sometimes the system id is not (properly) expanded.
 957         // We need to expand the system id if:
 958         // a. the expanded one was null; or
 959         // b. the base system id was null, but becomes non-null from the current entity.
 960         boolean needExpand = (expandedSystemId == null);
 961         // REVISIT:  why would the baseSystemId ever be null?  if we
 962         // didn't have to make this check we wouldn't have to reuse the
 963         // fXMLResourceIdentifier object...
 964         if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) {
 965             baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId();
 966             if (baseSystemId != null)
 967                 needExpand = true;
 968         }
 969         if (needExpand)
 970             expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false);
 971 
 972         // give the entity resolver a chance
 973         StaxXMLInputSource staxInputSource = null;
 974         XMLInputSource xmlInputSource = null;
 975 
 976         XMLResourceIdentifierImpl ri = null;
 977 
 978         if (resourceIdentifier instanceof XMLResourceIdentifierImpl) {
 979             ri = (XMLResourceIdentifierImpl)resourceIdentifier;
 980         } else {
 981             fResourceIdentifier.clear();
 982             ri = fResourceIdentifier;
 983         }
 984         ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId);
 985         if(DEBUG_RESOLVER){
 986             System.out.println("BEFORE Calling resolveEntity") ;
 987         }
 988 
 989         fISCreatedByResolver = false;
 990         //either of Stax or Xerces would be null
 991         if(fStaxEntityResolver != null){
 992             staxInputSource = fStaxEntityResolver.resolveEntity(ri);
 993             if(staxInputSource != null) {
 994                 fISCreatedByResolver = true;
 995             }
 996         }
 997 
 998         if(fEntityResolver != null){
 999             xmlInputSource = fEntityResolver.resolveEntity(ri);
1000             if(xmlInputSource != null) {
1001                 fISCreatedByResolver = true;
1002             }
1003         }
1004 
1005         if(xmlInputSource != null){
1006             //wrap this XMLInputSource to StaxInputSource
1007             staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver);
1008         }
1009 
1010         // do default resolution
1011         //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution
1012         if (staxInputSource == null) {
1013             // REVISIT: when systemId is null, I think we should return null.
1014             //          is this the right solution? -SG
1015             //if (systemId != null)
1016             staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId));
1017         }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){
1018             //Waiting for the clarification from EG. - nb
1019         }
1020 
1021         if (DEBUG_RESOLVER) {
1022             System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")");
1023             System.err.println(" = " + xmlInputSource);
1024         }
1025 
1026         return staxInputSource;
1027 
1028     }
1029 
1030     /**
1031      * Resolves the specified public and system identifiers. This
1032      * method first attempts to resolve the entity based on the
1033      * EntityResolver registered by the application. If no entity
1034      * resolver is registered or if the registered entity handler
1035      * is unable to resolve the entity, then default entity
1036      * resolution will occur.
1037      *
1038      * @param publicId     The public identifier of the entity.
1039      * @param systemId     The system identifier of the entity.
1040      * @param baseSystemId The base system identifier of the entity.
1041      *                     This is the system identifier of the current
1042      *                     entity and is used to expand the system
1043      *                     identifier when the system identifier is a
1044      *                     relative URI.
1045      *
1046      * @return Returns an input source that wraps the resolved entity.
1047      *         This method will never return null.
1048      *
1049      * @throws IOException  Thrown on i/o error.
1050      * @throws XNIException Thrown by entity resolver to signal an error.
1051      */
1052     public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException {
1053         if(resourceIdentifier == null ) return null;
1054         String publicId = resourceIdentifier.getPublicId();
1055         String literalSystemId = resourceIdentifier.getLiteralSystemId();
1056         String baseSystemId = resourceIdentifier.getBaseSystemId();
1057         String expandedSystemId = resourceIdentifier.getExpandedSystemId();
1058         String namespace = resourceIdentifier.getNamespace();
1059 
1060         // if no base systemId given, assume that it's relative
1061         // to the systemId of the current scanned entity
1062         // Sometimes the system id is not (properly) expanded.
1063         // We need to expand the system id if:
1064         // a. the expanded one was null; or
1065         // b. the base system id was null, but becomes non-null from the current entity.
1066         boolean needExpand = (expandedSystemId == null);
1067         // REVISIT:  why would the baseSystemId ever be null?  if we
1068         // didn't have to make this check we wouldn't have to reuse the
1069         // fXMLResourceIdentifier object...
1070         if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) {
1071             baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId();
1072             if (baseSystemId != null)
1073                 needExpand = true;
1074         }
1075         if (needExpand)
1076             expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false);
1077 
1078         // give the entity resolver a chance
1079         XMLInputSource xmlInputSource = null;
1080 
1081         if (fEntityResolver != null) {
1082             resourceIdentifier.setBaseSystemId(baseSystemId);
1083             resourceIdentifier.setExpandedSystemId(expandedSystemId);
1084             xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier);
1085         }
1086 
1087         // do default resolution
1088         // REVISIT: what's the correct behavior if the user provided an entity
1089         // resolver (fEntityResolver != null), but resolveEntity doesn't return
1090         // an input source (xmlInputSource == null)?
1091         // do we do default resolution, or do we just return null? -SG
1092         if (xmlInputSource == null) {
1093             // REVISIT: when systemId is null, I think we should return null.
1094             //          is this the right solution? -SG
1095             //if (systemId != null)
1096             xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId);
1097         }
1098 
1099         if (DEBUG_RESOLVER) {
1100             System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")");
1101             System.err.println(" = " + xmlInputSource);
1102         }
1103 
1104         return xmlInputSource;
1105 
1106     } // resolveEntity(XMLResourceIdentifier):XMLInputSource
1107 
1108     /**
1109      * Starts a named entity.
1110      *
1111      * @param reference flag to indicate whether the entity is an Entity Reference.
1112      * @param entityName The name of the entity to start.
1113      * @param literal    True if this entity is started within a literal
1114      *                   value.
1115      *
1116      * @throws IOException  Thrown on i/o error.
1117      * @throws XNIException Thrown by entity handler to signal an error.
1118      */
1119     public void startEntity(boolean reference, String entityName, boolean literal)
1120     throws IOException, XNIException {
1121 
1122         // was entity declared?
1123         Entity entity = fEntityStorage.getEntity(entityName);
1124         if (entity == null) {
1125             if (fEntityHandler != null) {
1126                 String encoding = null;
1127                 fResourceIdentifier.clear();
1128                 fEntityAugs.removeAllItems();
1129                 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1130                 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
1131                 fEntityAugs.removeAllItems();
1132                 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1133                 fEntityHandler.endEntity(entityName, fEntityAugs);
1134             }
1135             return;
1136         }
1137 
1138         // should we skip external entities?
1139         boolean external = entity.isExternal();
1140         Entity.ExternalEntity externalEntity = null;
1141         String extLitSysId = null, extBaseSysId = null, expandedSystemId = null;
1142         if (external) {
1143             externalEntity = (Entity.ExternalEntity)entity;
1144             extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null);
1145             extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null);
1146             expandedSystemId = expandSystemId(extLitSysId, extBaseSysId);
1147             boolean unparsed = entity.isUnparsed();
1148             boolean parameter = entityName.startsWith("%");
1149             boolean general = !parameter;
1150             if (unparsed || (general && !fExternalGeneralEntities) ||
1151                     (parameter && !fExternalParameterEntities) ||
1152                     !fSupportDTD || !fSupportExternalEntities) {
1153 
1154                 if (fEntityHandler != null) {
1155                     fResourceIdentifier.clear();
1156                     final String encoding = null;
1157                     fResourceIdentifier.setValues(
1158                             (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null),
1159                             extLitSysId, extBaseSysId, expandedSystemId);
1160                     fEntityAugs.removeAllItems();
1161                     fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1162                     fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
1163                     fEntityAugs.removeAllItems();
1164                     fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1165                     fEntityHandler.endEntity(entityName, fEntityAugs);
1166                 }
1167                 return;
1168             }
1169         }
1170 
1171         // is entity recursive?
1172         int size = fEntityStack.size();
1173         for (int i = size; i >= 0; i--) {
1174             Entity activeEntity = i == size
1175                     ? fCurrentEntity
1176                     : (Entity)fEntityStack.elementAt(i);
1177             if (activeEntity.name == entityName) {
1178                 String path = entityName;
1179                 for (int j = i + 1; j < size; j++) {
1180                     activeEntity = (Entity)fEntityStack.elementAt(j);
1181                     path = path + " -> " + activeEntity.name;
1182                 }
1183                 path = path + " -> " + fCurrentEntity.name;
1184                 path = path + " -> " + entityName;
1185                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
1186                         "RecursiveReference",
1187                         new Object[] { entityName, path },
1188                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1189 
1190                         if (fEntityHandler != null) {
1191                             fResourceIdentifier.clear();
1192                             final String encoding = null;
1193                             if (external) {
1194                                 fResourceIdentifier.setValues(
1195                                         (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null),
1196                                         extLitSysId, extBaseSysId, expandedSystemId);
1197                             }
1198                             fEntityAugs.removeAllItems();
1199                             fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1200                             fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
1201                             fEntityAugs.removeAllItems();
1202                             fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
1203                             fEntityHandler.endEntity(entityName, fEntityAugs);
1204                         }
1205 
1206                         return;
1207             }
1208         }
1209 
1210         // resolve external entity
1211         StaxXMLInputSource staxInputSource = null;
1212         XMLInputSource xmlInputSource = null ;
1213 
1214         if (external) {
1215             staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation);
1216             /** xxx:  Waiting from the EG
1217              * //simply return if there was entity resolver registered and application
1218              * //returns either XMLStreamReader or XMLEventReader.
1219              * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ;
1220              */
1221             xmlInputSource = staxInputSource.getXMLInputSource() ;
1222             if (!fISCreatedByResolver) {
1223                 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation
1224                 if (fLoadExternalDTD) {
1225                     String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL);
1226                     if (accessError != null) {
1227                         fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
1228                                 "AccessExternalEntity",
1229                                 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError },
1230                                 XMLErrorReporter.SEVERITY_FATAL_ERROR);
1231                     }
1232                 }
1233             }
1234         }
1235         // wrap internal entity
1236         else {
1237             Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity;
1238             Reader reader = new StringReader(internalEntity.text);
1239             xmlInputSource = new XMLInputSource(null, null, null, reader, null);
1240         }
1241 
1242         // start the entity
1243         startEntity(reference, entityName, xmlInputSource, literal, external);
1244 
1245     } // startEntity(String,boolean)
1246 
1247     /**
1248      * Starts the document entity. The document entity has the "[xml]"
1249      * pseudo-name.
1250      *
1251      * @param xmlInputSource The input source of the document entity.
1252      *
1253      * @throws IOException  Thrown on i/o error.
1254      * @throws XNIException Thrown by entity handler to signal an error.
1255      */
1256     public void startDocumentEntity(XMLInputSource xmlInputSource)
1257     throws IOException, XNIException {
1258         startEntity(false, XMLEntity, xmlInputSource, false, true);
1259     } // startDocumentEntity(XMLInputSource)
1260 
1261     //xxx these methods are not required.
1262     /**
1263      * Starts the DTD entity. The DTD entity has the "[dtd]"
1264      * pseudo-name.
1265      *
1266      * @param xmlInputSource The input source of the DTD entity.
1267      *
1268      * @throws IOException  Thrown on i/o error.
1269      * @throws XNIException Thrown by entity handler to signal an error.
1270      */
1271     public void startDTDEntity(XMLInputSource xmlInputSource)
1272     throws IOException, XNIException {
1273         startEntity(false, DTDEntity, xmlInputSource, false, true);
1274     } // startDTDEntity(XMLInputSource)
1275 
1276     // indicate start of external subset so that
1277     // location of entity decls can be tracked
1278     public void startExternalSubset() {
1279         fInExternalSubset = true;
1280     }
1281 
1282     public void endExternalSubset() {
1283         fInExternalSubset = false;
1284     }
1285 
1286     /**
1287      * Starts an entity.
1288      * <p>
1289      * This method can be used to insert an application defined XML
1290      * entity stream into the parsing stream.
1291      *
1292      * @param reference flag to indicate whether the entity is an Entity Reference.
1293      * @param name           The name of the entity.
1294      * @param xmlInputSource The input source of the entity.
1295      * @param literal        True if this entity is started within a
1296      *                       literal value.
1297      * @param isExternal    whether this entity should be treated as an internal or external entity.
1298      *
1299      * @throws IOException  Thrown on i/o error.
1300      * @throws XNIException Thrown by entity handler to signal an error.
1301      */
1302     public void startEntity(boolean reference, String name,
1303             XMLInputSource xmlInputSource,
1304             boolean literal, boolean isExternal)
1305             throws IOException, XNIException {
1306 
1307         String encoding = setupCurrentEntity(reference, name, xmlInputSource, literal, isExternal);
1308 
1309         //when entity expansion limit is set by the Application, we need to
1310         //check for the entity expansion limit set by the parser, if number of entity
1311         //expansions exceeds the entity expansion limit, parser will throw fatal error.
1312         // Note that this represents the nesting level of open entities.
1313         fEntityExpansionCount++;
1314         if(fLimitAnalyzer != null) {
1315            fLimitAnalyzer.addValue(entityExpansionIndex, name, 1);
1316         }
1317         if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){
1318             fSecurityManager.debugPrint(fLimitAnalyzer);
1319             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimit",
1320                     new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)},
1321                                              XMLErrorReporter.SEVERITY_FATAL_ERROR );
1322             // is there anything better to do than reset the counter?
1323             // at least one can envision debugging applications where this might
1324             // be useful...
1325             fEntityExpansionCount = 0;
1326         }
1327 
1328         // call handler
1329         if (fEntityHandler != null) {
1330             fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null);
1331         }
1332 
1333     } // startEntity(String,XMLInputSource)
1334 
1335     /**
1336      * Return the current entity being scanned. Current entity is SET using startEntity function.
1337      * @return Entity.ScannedEntity
1338      */
1339 
1340     public Entity.ScannedEntity getCurrentEntity(){
1341         return fCurrentEntity ;
1342     }
1343 
1344     /**
1345      * Return the top level entity handled by this manager, or null
1346      * if no entity was added.
1347      */
1348     public Entity.ScannedEntity getTopLevelEntity() {
1349         return (Entity.ScannedEntity)
1350             (fEntityStack.empty() ? null : fEntityStack.elementAt(0));
1351     }
1352 
1353 
1354     /**
1355      * Close all opened InputStreams and Readers opened by this parser.
1356      */
1357     public void closeReaders() {
1358         /** this call actually does nothing, readers are closed in the endEntity method
1359          * through the current entity.
1360          * The change seems to have happened during the jdk6 development with the
1361          * addition of StAX
1362         **/
1363     }
1364 
1365     public void endEntity() throws IOException, XNIException {
1366 
1367         // call handler
1368         if (DEBUG_BUFFER) {
1369             System.out.print("(endEntity: ");
1370             print();
1371             System.out.println();
1372         }
1373         //pop the entity from the stack
1374         Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ;
1375 
1376         /** need to close the reader first since the program can end
1377          *  prematurely (e.g. fEntityHandler.endEntity may throw exception)
1378          *  leaving the reader open
1379          */
1380         //close the reader
1381         if(fCurrentEntity != null){
1382             //close the reader
1383             try{
1384                 if (fLimitAnalyzer != null) {
1385                     fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name);
1386                     if (fCurrentEntity.name.equals("[xml]")) {
1387                         fSecurityManager.debugPrint(fLimitAnalyzer);
1388                     }
1389                 }
1390                 fCurrentEntity.close();
1391             }catch(IOException ex){
1392                 throw new XNIException(ex);
1393             }
1394         }
1395 
1396         if (fEntityHandler != null) {
1397             //so this is the last opened entity, signal it to current fEntityHandler using Augmentation
1398             if(entity == null){
1399                 fEntityAugs.removeAllItems();
1400                 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE);
1401                 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs);
1402                 fEntityAugs.removeAllItems();
1403             }else{
1404                 fEntityHandler.endEntity(fCurrentEntity.name, null);
1405             }
1406         }
1407         //check if it is a document entity
1408         boolean documentEntity = fCurrentEntity.name == XMLEntity;
1409 
1410         //set popped entity as current entity
1411         fCurrentEntity = entity;
1412         fEntityScanner.setCurrentEntity(fCurrentEntity);
1413 
1414         //check if there are any entity left in the stack -- if there are
1415         //no entries EOF has been reached.
1416         // throw exception when it is the last entity but it is not a document entity
1417 
1418         if(fCurrentEntity == null & !documentEntity){
1419             throw new EOFException() ;
1420         }
1421 
1422         if (DEBUG_BUFFER) {
1423             System.out.print(")endEntity: ");
1424             print();
1425             System.out.println();
1426         }
1427 
1428     } // endEntity()
1429 
1430 
1431     //
1432     // XMLComponent methods
1433     //
1434     public void reset(PropertyManager propertyManager){
1435         // xerces properties
1436         fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY);
1437         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY);
1438         try {
1439             fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER);
1440         } catch (XMLConfigurationException e) {
1441             fStaxEntityResolver = null;
1442         }
1443 
1444         fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue();
1445         fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue();
1446         fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue();
1447 
1448         // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd
1449         fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue();
1450 
1451         // JAXP 1.5 feature
1452         XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER);
1453         fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
1454 
1455         fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER);
1456 
1457         fLimitAnalyzer = new XMLLimitAnalyzer();
1458         //reset fEntityStorage
1459         fEntityStorage.reset(propertyManager);
1460         //reset XMLEntityReaderImpl
1461         fEntityScanner.reset(propertyManager);
1462 
1463         // initialize state
1464         //fStandalone = false;
1465         fEntities.clear();
1466         fEntityStack.removeAllElements();
1467         fCurrentEntity = null;
1468         fValidation = false;
1469         fExternalGeneralEntities = true;
1470         fExternalParameterEntities = true;
1471         fAllowJavaEncodings = true ;
1472     }
1473 
1474     /**
1475      * Resets the component. The component can query the component manager
1476      * about any features and properties that affect the operation of the
1477      * component.
1478      *
1479      * @param componentManager The component manager.
1480      *
1481      * @throws SAXException Thrown by component on initialization error.
1482      *                      For example, if a feature or property is
1483      *                      required for the operation of the component, the
1484      *                      component manager may throw a
1485      *                      SAXNotRecognizedException or a
1486      *                      SAXNotSupportedException.
1487      */
1488     public void reset(XMLComponentManager componentManager)
1489     throws XMLConfigurationException {
1490 
1491         boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true);
1492 
1493         if (!parser_settings) {
1494             // parser settings have not been changed
1495             reset();
1496             if(fEntityScanner != null){
1497                 fEntityScanner.reset(componentManager);
1498             }
1499             if(fEntityStorage != null){
1500                 fEntityStorage.reset(componentManager);
1501             }
1502             return;
1503         }
1504 
1505         // sax features
1506         fValidation = componentManager.getFeature(VALIDATION, false);
1507         fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true);
1508         fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true);
1509 
1510         // xerces features
1511         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
1512         fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false);
1513         fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false);
1514         fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true);
1515 
1516         // xerces properties
1517         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
1518         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
1519         fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null);
1520         fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null);
1521         fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null);
1522         fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null);
1523         entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT);
1524 
1525         //StAX Property
1526         fSupportDTD = true;
1527         fReplaceEntityReferences = true;
1528         fSupportExternalEntities = true;
1529 
1530         // JAXP 1.5 feature
1531         XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null);
1532         if (spm == null) {
1533             spm = new XMLSecurityPropertyManager();
1534         }
1535         fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
1536 
1537         //reset general state
1538         reset();
1539 
1540         fEntityScanner.reset(componentManager);
1541         fEntityStorage.reset(componentManager);
1542 
1543     } // reset(XMLComponentManager)
1544 
1545     // reset general state.  Should not be called other than by
1546     // a class acting as a component manager but not
1547     // implementing that interface for whatever reason.
1548     public void reset() {
1549         fLimitAnalyzer = new XMLLimitAnalyzer();
1550         // initialize state
1551         fStandalone = false;
1552         fEntities.clear();
1553         fEntityStack.removeAllElements();
1554         fEntityExpansionCount = 0;
1555 
1556         fCurrentEntity = null;
1557         // reset scanner
1558         if(fXML10EntityScanner != null){
1559             fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1560         }
1561         if(fXML11EntityScanner != null) {
1562             fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1563         }
1564 
1565         // DEBUG
1566         if (DEBUG_ENTITIES) {
1567             addInternalEntity("text", "Hello, World.");
1568             addInternalEntity("empty-element", "<foo/>");
1569             addInternalEntity("balanced-element", "<foo></foo>");
1570             addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>");
1571             addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>");
1572             addInternalEntity("unbalanced-entity", "<foo>");
1573             addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>");
1574             addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>");
1575             addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>");
1576             try {
1577                 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml");
1578                 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml");
1579                 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml");
1580                 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml");
1581             }
1582             catch (IOException ex) {
1583                 // should never happen
1584             }
1585         }
1586 
1587         fEntityHandler = null;
1588 
1589         // reset scanner
1590         //if(fEntityScanner!=null)
1591           //  fEntityScanner.reset(fSymbolTable, this,fErrorReporter);
1592 
1593     }
1594     /**
1595      * Returns a list of feature identifiers that are recognized by
1596      * this component. This method may return null if no features
1597      * are recognized by this component.
1598      */
1599     public String[] getRecognizedFeatures() {
1600         return (String[])(RECOGNIZED_FEATURES.clone());
1601     } // getRecognizedFeatures():String[]
1602 
1603     /**
1604      * Sets the state of a feature. This method is called by the component
1605      * manager any time after reset when a feature changes state.
1606      * <p>
1607      * <strong>Note:</strong> Components should silently ignore features
1608      * that do not affect the operation of the component.
1609      *
1610      * @param featureId The feature identifier.
1611      * @param state     The state of the feature.
1612      *
1613      * @throws SAXNotRecognizedException The component should not throw
1614      *                                   this exception.
1615      * @throws SAXNotSupportedException The component should not throw
1616      *                                  this exception.
1617      */
1618     public void setFeature(String featureId, boolean state)
1619     throws XMLConfigurationException {
1620 
1621         // xerces features
1622         if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
1623             final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
1624             if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() &&
1625                 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) {
1626                 fAllowJavaEncodings = state;
1627             }
1628             if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() &&
1629                 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
1630                 fLoadExternalDTD = state;
1631                 return;
1632             }
1633         }
1634 
1635     } // setFeature(String,boolean)
1636 
1637     /**
1638      * Sets the value of a property. This method is called by the component
1639      * manager any time after reset when a property changes value.
1640      * <p>
1641      * <strong>Note:</strong> Components should silently ignore properties
1642      * that do not affect the operation of the component.
1643      *
1644      * @param propertyId The property identifier.
1645      * @param value      The value of the property.
1646      *
1647      * @throws SAXNotRecognizedException The component should not throw
1648      *                                   this exception.
1649      * @throws SAXNotSupportedException The component should not throw
1650      *                                  this exception.
1651      */
1652     public void setProperty(String propertyId, Object value){
1653         // Xerces properties
1654         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
1655             final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
1656 
1657             if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
1658                 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
1659                 fSymbolTable = (SymbolTable)value;
1660                 return;
1661             }
1662             if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
1663                 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
1664                 fErrorReporter = (XMLErrorReporter)value;
1665                 return;
1666             }
1667             if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
1668                 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
1669                 fEntityResolver = (XMLEntityResolver)value;
1670                 return;
1671             }
1672             if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() &&
1673                 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) {
1674                 Integer bufferSize = (Integer)value;
1675                 if (bufferSize != null &&
1676                     bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
1677                     fBufferSize = bufferSize.intValue();
1678                     fEntityScanner.setBufferSize(fBufferSize);
1679                     fBufferPool.setExternalBufferSize(fBufferSize);
1680                 }
1681             }
1682             if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() &&
1683                 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
1684                 fSecurityManager = (XMLSecurityManager)value;
1685             }
1686         }
1687 
1688         //JAXP 1.5 properties
1689         if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER))
1690         {
1691             XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value;
1692             fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
1693         }
1694     }
1695 
1696     public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) {
1697         this.fLimitAnalyzer = fLimitAnalyzer;
1698     }
1699 
1700     /**
1701      * Returns a list of property identifiers that are recognized by
1702      * this component. This method may return null if no properties
1703      * are recognized by this component.
1704      */
1705     public String[] getRecognizedProperties() {
1706         return (String[])(RECOGNIZED_PROPERTIES.clone());
1707     } // getRecognizedProperties():String[]
1708     /**
1709      * Returns the default state for a feature, or null if this
1710      * component does not want to report a default value for this
1711      * feature.
1712      *
1713      * @param featureId The feature identifier.
1714      *
1715      * @since Xerces 2.2.0
1716      */
1717     public Boolean getFeatureDefault(String featureId) {
1718         for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
1719             if (RECOGNIZED_FEATURES[i].equals(featureId)) {
1720                 return FEATURE_DEFAULTS[i];
1721             }
1722         }
1723         return null;
1724     } // getFeatureDefault(String):Boolean
1725 
1726     /**
1727      * Returns the default state for a property, or null if this
1728      * component does not want to report a default value for this
1729      * property.
1730      *
1731      * @param propertyId The property identifier.
1732      *
1733      * @since Xerces 2.2.0
1734      */
1735     public Object getPropertyDefault(String propertyId) {
1736         for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
1737             if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
1738                 return PROPERTY_DEFAULTS[i];
1739             }
1740         }
1741         return null;
1742     } // getPropertyDefault(String):Object
1743 
1744     //
1745     // Public static methods
1746     //
1747 
1748     /**
1749      * Expands a system id and returns the system id as a URI, if
1750      * it can be expanded. A return value of null means that the
1751      * identifier is already expanded. An exception thrown
1752      * indicates a failure to expand the id.
1753      *
1754      * @param systemId The systemId to be expanded.
1755      *
1756      * @return Returns the URI string representing the expanded system
1757      *         identifier. A null value indicates that the given
1758      *         system identifier is already expanded.
1759      *
1760      */
1761     public static String expandSystemId(String systemId) {
1762         return expandSystemId(systemId, null);
1763     } // expandSystemId(String):String
1764 
1765     //
1766     // Public static methods
1767     //
1768 
1769     // current value of the "user.dir" property
1770     private static String gUserDir;
1771     // cached URI object for the current value of the escaped "user.dir" property stored as a URI
1772     private static URI gUserDirURI;
1773     // which ASCII characters need to be escaped
1774     private static boolean gNeedEscaping[] = new boolean[128];
1775     // the first hex character if a character needs to be escaped
1776     private static char gAfterEscaping1[] = new char[128];
1777     // the second hex character if a character needs to be escaped
1778     private static char gAfterEscaping2[] = new char[128];
1779     private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
1780                                      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
1781     // initialize the above 3 arrays
1782     static {
1783         for (int i = 0; i <= 0x1f; i++) {
1784             gNeedEscaping[i] = true;
1785             gAfterEscaping1[i] = gHexChs[i >> 4];
1786             gAfterEscaping2[i] = gHexChs[i & 0xf];
1787         }
1788         gNeedEscaping[0x7f] = true;
1789         gAfterEscaping1[0x7f] = '7';
1790         gAfterEscaping2[0x7f] = 'F';
1791         char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}',
1792                          '|', '\\', '^', '~', '[', ']', '`'};
1793         int len = escChs.length;
1794         char ch;
1795         for (int i = 0; i < len; i++) {
1796             ch = escChs[i];
1797             gNeedEscaping[ch] = true;
1798             gAfterEscaping1[ch] = gHexChs[ch >> 4];
1799             gAfterEscaping2[ch] = gHexChs[ch & 0xf];
1800         }
1801     }
1802 
1803     // To escape the "user.dir" system property, by using %HH to represent
1804     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
1805     // and '"'. It's a static method, so needs to be synchronized.
1806     // this method looks heavy, but since the system property isn't expected
1807     // to change often, so in most cases, we only need to return the URI
1808     // that was escaped before.
1809     // According to the URI spec, non-ASCII characters (whose value >= 128)
1810     // need to be escaped too.
1811     // REVISIT: don't know how to escape non-ASCII characters, especially
1812     // which encoding to use. Leave them for now.
1813     private static synchronized URI getUserDir() throws URI.MalformedURIException {
1814         // get the user.dir property
1815         String userDir = "";
1816         try {
1817             userDir = SecuritySupport.getSystemProperty("user.dir");
1818         }
1819         catch (SecurityException se) {
1820         }
1821 
1822         // return empty string if property value is empty string.
1823         if (userDir.length() == 0)
1824             return new URI("file", "", "", null, null);
1825         // compute the new escaped value if the new property value doesn't
1826         // match the previous one
1827         if (gUserDirURI != null && userDir.equals(gUserDir)) {
1828             return gUserDirURI;
1829         }
1830 
1831         // record the new value as the global property value
1832         gUserDir = userDir;
1833 
1834         char separator = java.io.File.separatorChar;
1835         userDir = userDir.replace(separator, '/');
1836 
1837         int len = userDir.length(), ch;
1838         StringBuilder buffer = new StringBuilder(len*3);
1839         // change C:/blah to /C:/blah
1840         if (len >= 2 && userDir.charAt(1) == ':') {
1841             ch = Character.toUpperCase(userDir.charAt(0));
1842             if (ch >= 'A' && ch <= 'Z') {
1843                 buffer.append('/');
1844             }
1845         }
1846 
1847         // for each character in the path
1848         int i = 0;
1849         for (; i < len; i++) {
1850             ch = userDir.charAt(i);
1851             // if it's not an ASCII character, break here, and use UTF-8 encoding
1852             if (ch >= 128)
1853                 break;
1854             if (gNeedEscaping[ch]) {
1855                 buffer.append('%');
1856                 buffer.append(gAfterEscaping1[ch]);
1857                 buffer.append(gAfterEscaping2[ch]);
1858                 // record the fact that it's escaped
1859             }
1860             else {
1861                 buffer.append((char)ch);
1862             }
1863         }
1864 
1865         // we saw some non-ascii character
1866         if (i < len) {
1867             // get UTF-8 bytes for the remaining sub-string
1868             byte[] bytes = null;
1869             byte b;
1870             try {
1871                 bytes = userDir.substring(i).getBytes("UTF-8");
1872             } catch (java.io.UnsupportedEncodingException e) {
1873                 // should never happen
1874                 return new URI("file", "", userDir, null, null);
1875             }
1876             len = bytes.length;
1877 
1878             // for each byte
1879             for (i = 0; i < len; i++) {
1880                 b = bytes[i];
1881                 // for non-ascii character: make it positive, then escape
1882                 if (b < 0) {
1883                     ch = b + 256;
1884                     buffer.append('%');
1885                     buffer.append(gHexChs[ch >> 4]);
1886                     buffer.append(gHexChs[ch & 0xf]);
1887                 }
1888                 else if (gNeedEscaping[b]) {
1889                     buffer.append('%');
1890                     buffer.append(gAfterEscaping1[b]);
1891                     buffer.append(gAfterEscaping2[b]);
1892                 }
1893                 else {
1894                     buffer.append((char)b);
1895                 }
1896             }
1897         }
1898 
1899         // change blah/blah to blah/blah/
1900         if (!userDir.endsWith("/"))
1901             buffer.append('/');
1902 
1903         gUserDirURI = new URI("file", "", buffer.toString(), null, null);
1904 
1905         return gUserDirURI;
1906     }
1907 
1908     public static OutputStream createOutputStream(String uri) throws IOException {
1909         // URI was specified. Handle relative URIs.
1910         final String expanded = XMLEntityManager.expandSystemId(uri, null, true);
1911         final URL url = new URL(expanded != null ? expanded : uri);
1912         OutputStream out = null;
1913         String protocol = url.getProtocol();
1914         String host = url.getHost();
1915         // Use FileOutputStream if this URI is for a local file.
1916         if (protocol.equals("file")
1917                 && (host == null || host.length() == 0 || host.equals("localhost"))) {
1918             File file = new File(getPathWithoutEscapes(url.getPath()));
1919             if (!file.exists()) {
1920                 File parent = file.getParentFile();
1921                 if (parent != null && !parent.exists()) {
1922                     parent.mkdirs();
1923                 }
1924             }
1925             out = new FileOutputStream(file);
1926         }
1927         // Try to write to some other kind of URI. Some protocols
1928         // won't support this, though HTTP should work.
1929         else {
1930             URLConnection urlCon = url.openConnection();
1931             urlCon.setDoInput(false);
1932             urlCon.setDoOutput(true);
1933             urlCon.setUseCaches(false); // Enable tunneling.
1934             if (urlCon instanceof HttpURLConnection) {
1935                 // The DOM L3 REC says if we are writing to an HTTP URI
1936                 // it is to be done with an HTTP PUT.
1937                 HttpURLConnection httpCon = (HttpURLConnection) urlCon;
1938                 httpCon.setRequestMethod("PUT");
1939             }
1940             out = urlCon.getOutputStream();
1941         }
1942         return out;
1943     }
1944 
1945     private static String getPathWithoutEscapes(String origPath) {
1946         if (origPath != null && origPath.length() != 0 && origPath.indexOf('%') != -1) {
1947             // Locate the escape characters
1948             StringTokenizer tokenizer = new StringTokenizer(origPath, "%");
1949             StringBuilder result = new StringBuilder(origPath.length());
1950             int size = tokenizer.countTokens();
1951             result.append(tokenizer.nextToken());
1952             for(int i = 1; i < size; ++i) {
1953                 String token = tokenizer.nextToken();
1954                 // Decode the 2 digit hexadecimal number following % in '%nn'
1955                 result.append((char)Integer.valueOf(token.substring(0, 2), 16).intValue());
1956                 result.append(token.substring(2));
1957             }
1958             return result.toString();
1959         }
1960         return origPath;
1961     }
1962 
1963     /**
1964      * Absolutizes a URI using the current value
1965      * of the "user.dir" property as the base URI. If
1966      * the URI is already absolute, this is a no-op.
1967      *
1968      * @param uri the URI to absolutize
1969      */
1970     public static void absolutizeAgainstUserDir(URI uri)
1971         throws URI.MalformedURIException {
1972         uri.absolutize(getUserDir());
1973     }
1974 
1975     /**
1976      * Expands a system id and returns the system id as a URI, if
1977      * it can be expanded. A return value of null means that the
1978      * identifier is already expanded. An exception thrown
1979      * indicates a failure to expand the id.
1980      *
1981      * @param systemId The systemId to be expanded.
1982      *
1983      * @return Returns the URI string representing the expanded system
1984      *         identifier. A null value indicates that the given
1985      *         system identifier is already expanded.
1986      *
1987      */
1988     public static String expandSystemId(String systemId, String baseSystemId) {
1989 
1990         // check for bad parameters id
1991         if (systemId == null || systemId.length() == 0) {
1992             return systemId;
1993         }
1994         // if id already expanded, return
1995         try {
1996             URI uri = new URI(systemId);
1997             if (uri != null) {
1998                 return systemId;
1999             }
2000         } catch (URI.MalformedURIException e) {
2001             // continue on...
2002         }
2003         // normalize id
2004         String id = fixURI(systemId);
2005 
2006         // normalize base
2007         URI base = null;
2008         URI uri = null;
2009         try {
2010             if (baseSystemId == null || baseSystemId.length() == 0 ||
2011                     baseSystemId.equals(systemId)) {
2012                 String dir = getUserDir().toString();
2013                 base = new URI("file", "", dir, null, null);
2014             } else {
2015                 try {
2016                     base = new URI(fixURI(baseSystemId));
2017                 } catch (URI.MalformedURIException e) {
2018                     if (baseSystemId.indexOf(':') != -1) {
2019                         // for xml schemas we might have baseURI with
2020                         // a specified drive
2021                         base = new URI("file", "", fixURI(baseSystemId), null, null);
2022                     } else {
2023                         String dir = getUserDir().toString();
2024                         dir = dir + fixURI(baseSystemId);
2025                         base = new URI("file", "", dir, null, null);
2026                     }
2027                 }
2028             }
2029             // expand id
2030             uri = new URI(base, id);
2031         } catch (Exception e) {
2032             // let it go through
2033 
2034         }
2035 
2036         if (uri == null) {
2037             return systemId;
2038         }
2039         return uri.toString();
2040 
2041     } // expandSystemId(String,String):String
2042 
2043     /**
2044      * Expands a system id and returns the system id as a URI, if
2045      * it can be expanded. A return value of null means that the
2046      * identifier is already expanded. An exception thrown
2047      * indicates a failure to expand the id.
2048      *
2049      * @param systemId The systemId to be expanded.
2050      *
2051      * @return Returns the URI string representing the expanded system
2052      *         identifier. A null value indicates that the given
2053      *         system identifier is already expanded.
2054      *
2055      */
2056     public static String expandSystemId(String systemId, String baseSystemId,
2057                                         boolean strict)
2058             throws URI.MalformedURIException {
2059 
2060         // check if there is a system id before
2061         // trying to expand it.
2062         if (systemId == null) {
2063             return null;
2064         }
2065 
2066         // system id has to be a valid URI
2067         if (strict) {
2068 
2069 
2070             // check if there is a system id before
2071             // trying to expand it.
2072             if (systemId == null) {
2073                 return null;
2074             }
2075 
2076             try {
2077                 // if it's already an absolute one, return it
2078                 new URI(systemId);
2079                 return systemId;
2080             }
2081             catch (URI.MalformedURIException ex) {
2082             }
2083             URI base = null;
2084             // if there isn't a base uri, use the working directory
2085             if (baseSystemId == null || baseSystemId.length() == 0) {
2086                 base = new URI("file", "", getUserDir().toString(), null, null);
2087             }
2088             // otherwise, use the base uri
2089             else {
2090                 try {
2091                     base = new URI(baseSystemId);
2092                 }
2093                 catch (URI.MalformedURIException e) {
2094                     // assume "base" is also a relative uri
2095                     String dir = getUserDir().toString();
2096                     dir = dir + baseSystemId;
2097                     base = new URI("file", "", dir, null, null);
2098                 }
2099             }
2100             // absolutize the system id using the base
2101             URI uri = new URI(base, systemId);
2102             // return the string rep of the new uri (an absolute one)
2103             return uri.toString();
2104 
2105             // if any exception is thrown, it'll get thrown to the caller.
2106         }
2107 
2108         // Assume the URIs are well-formed. If it turns out they're not, try fixing them up.
2109         try {
2110              return expandSystemIdStrictOff(systemId, baseSystemId);
2111         }
2112         catch (URI.MalformedURIException e) {
2113             /** Xerces URI rejects unicode, try java.net.URI
2114              * this is not ideal solution, but it covers known cases which either
2115              * Xerces URI or java.net.URI can handle alone
2116              * will file bug against java.net.URI
2117              */
2118             try {
2119                 return expandSystemIdStrictOff1(systemId, baseSystemId);
2120             } catch (URISyntaxException ex) {
2121                 // continue on...
2122             }
2123         }
2124         // check for bad parameters id
2125         if (systemId.length() == 0) {
2126             return systemId;
2127         }
2128 
2129         // normalize id
2130         String id = fixURI(systemId);
2131 
2132         // normalize base
2133         URI base = null;
2134         URI uri = null;
2135         try {
2136             if (baseSystemId == null || baseSystemId.length() == 0 ||
2137                 baseSystemId.equals(systemId)) {
2138                 base = getUserDir();
2139             }
2140             else {
2141                 try {
2142                     base = new URI(fixURI(baseSystemId).trim());
2143                 }
2144                 catch (URI.MalformedURIException e) {
2145                     if (baseSystemId.indexOf(':') != -1) {
2146                         // for xml schemas we might have baseURI with
2147                         // a specified drive
2148                         base = new URI("file", "", fixURI(baseSystemId).trim(), null, null);
2149                     }
2150                     else {
2151                         base = new URI(getUserDir(), fixURI(baseSystemId));
2152                     }
2153                 }
2154              }
2155              // expand id
2156              uri = new URI(base, id.trim());
2157         }
2158         catch (Exception e) {
2159             // let it go through
2160 
2161         }
2162 
2163         if (uri == null) {
2164             return systemId;
2165         }
2166         return uri.toString();
2167 
2168     } // expandSystemId(String,String,boolean):String
2169 
2170     /**
2171      * Helper method for expandSystemId(String,String,boolean):String
2172      */
2173     private static String expandSystemIdStrictOn(String systemId, String baseSystemId)
2174         throws URI.MalformedURIException {
2175 
2176         URI systemURI = new URI(systemId, true);
2177         // If it's already an absolute one, return it
2178         if (systemURI.isAbsoluteURI()) {
2179             return systemId;
2180         }
2181 
2182         // If there isn't a base URI, use the working directory
2183         URI baseURI = null;
2184         if (baseSystemId == null || baseSystemId.length() == 0) {
2185             baseURI = getUserDir();
2186         }
2187         else {
2188             baseURI = new URI(baseSystemId, true);
2189             if (!baseURI.isAbsoluteURI()) {
2190                 // assume "base" is also a relative uri
2191                 baseURI.absolutize(getUserDir());
2192             }
2193         }
2194 
2195         // absolutize the system identifier using the base URI
2196         systemURI.absolutize(baseURI);
2197 
2198         // return the string rep of the new uri (an absolute one)
2199         return systemURI.toString();
2200 
2201         // if any exception is thrown, it'll get thrown to the caller.
2202 
2203     } // expandSystemIdStrictOn(String,String):String
2204 
2205     /**
2206      * Helper method for expandSystemId(String,String,boolean):String
2207      */
2208     private static String expandSystemIdStrictOff(String systemId, String baseSystemId)
2209         throws URI.MalformedURIException {
2210 
2211         URI systemURI = new URI(systemId, true);
2212         // If it's already an absolute one, return it
2213         if (systemURI.isAbsoluteURI()) {
2214             if (systemURI.getScheme().length() > 1) {
2215                 return systemId;
2216             }
2217             /**
2218              * If the scheme's length is only one character,
2219              * it's likely that this was intended as a file
2220              * path. Fixing this up in expandSystemId to
2221              * maintain backwards compatibility.
2222              */
2223             throw new URI.MalformedURIException();
2224         }
2225 
2226         // If there isn't a base URI, use the working directory
2227         URI baseURI = null;
2228         if (baseSystemId == null || baseSystemId.length() == 0) {
2229             baseURI = getUserDir();
2230         }
2231         else {
2232             baseURI = new URI(baseSystemId, true);
2233             if (!baseURI.isAbsoluteURI()) {
2234                 // assume "base" is also a relative uri
2235                 baseURI.absolutize(getUserDir());
2236             }
2237         }
2238 
2239         // absolutize the system identifier using the base URI
2240         systemURI.absolutize(baseURI);
2241 
2242         // return the string rep of the new uri (an absolute one)
2243         return systemURI.toString();
2244 
2245         // if any exception is thrown, it'll get thrown to the caller.
2246 
2247     } // expandSystemIdStrictOff(String,String):String
2248 
2249     private static String expandSystemIdStrictOff1(String systemId, String baseSystemId)
2250         throws URISyntaxException, URI.MalformedURIException {
2251 
2252             java.net.URI systemURI = new java.net.URI(systemId);
2253         // If it's already an absolute one, return it
2254         if (systemURI.isAbsolute()) {
2255             if (systemURI.getScheme().length() > 1) {
2256                 return systemId;
2257             }
2258             /**
2259              * If the scheme's length is only one character,
2260              * it's likely that this was intended as a file
2261              * path. Fixing this up in expandSystemId to
2262              * maintain backwards compatibility.
2263              */
2264             throw new URISyntaxException(systemId, "the scheme's length is only one character");
2265         }
2266 
2267         // If there isn't a base URI, use the working directory
2268         URI baseURI = null;
2269         if (baseSystemId == null || baseSystemId.length() == 0) {
2270             baseURI = getUserDir();
2271         }
2272         else {
2273             baseURI = new URI(baseSystemId, true);
2274             if (!baseURI.isAbsoluteURI()) {
2275                 // assume "base" is also a relative uri
2276                 baseURI.absolutize(getUserDir());
2277             }
2278         }
2279 
2280         // absolutize the system identifier using the base URI
2281 //        systemURI.absolutize(baseURI);
2282         systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI);
2283 
2284         // return the string rep of the new uri (an absolute one)
2285         return systemURI.toString();
2286 
2287         // if any exception is thrown, it'll get thrown to the caller.
2288 
2289     } // expandSystemIdStrictOff(String,String):String
2290 
2291     //
2292     // Protected methods
2293     //
2294 
2295 
2296     /**
2297      * Returns the IANA encoding name that is auto-detected from
2298      * the bytes specified, with the endian-ness of that encoding where appropriate.
2299      *
2300      * @param b4    The first four bytes of the input.
2301      * @param count The number of bytes actually read.
2302      * @return a 2-element array:  the first element, an IANA-encoding string,
2303      *  the second element a Boolean which is true iff the document is big endian, false
2304      *  if it's little-endian, and null if the distinction isn't relevant.
2305      */
2306     protected Object[] getEncodingName(byte[] b4, int count) {
2307 
2308         if (count < 2) {
2309             return defaultEncoding;
2310         }
2311 
2312         // UTF-16, with BOM
2313         int b0 = b4[0] & 0xFF;
2314         int b1 = b4[1] & 0xFF;
2315         if (b0 == 0xFE && b1 == 0xFF) {
2316             // UTF-16, big-endian
2317             return new Object [] {"UTF-16BE", new Boolean(true)};
2318         }
2319         if (b0 == 0xFF && b1 == 0xFE) {
2320             // UTF-16, little-endian
2321             return new Object [] {"UTF-16LE", new Boolean(false)};
2322         }
2323 
2324         // default to UTF-8 if we don't have enough bytes to make a
2325         // good determination of the encoding
2326         if (count < 3) {
2327             return defaultEncoding;
2328         }
2329 
2330         // UTF-8 with a BOM
2331         int b2 = b4[2] & 0xFF;
2332         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
2333             return defaultEncoding;
2334         }
2335 
2336         // default to UTF-8 if we don't have enough bytes to make a
2337         // good determination of the encoding
2338         if (count < 4) {
2339             return defaultEncoding;
2340         }
2341 
2342         // other encodings
2343         int b3 = b4[3] & 0xFF;
2344         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2345             // UCS-4, big endian (1234)
2346             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
2347         }
2348         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2349             // UCS-4, little endian (4321)
2350             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
2351         }
2352         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2353             // UCS-4, unusual octet order (2143)
2354             // REVISIT: What should this be?
2355             return new Object [] {"ISO-10646-UCS-4", null};
2356         }
2357         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2358             // UCS-4, unusual octect order (3412)
2359             // REVISIT: What should this be?
2360             return new Object [] {"ISO-10646-UCS-4", null};
2361         }
2362         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2363             // UTF-16, big-endian, no BOM
2364             // (or could turn out to be UCS-2...
2365             // REVISIT: What should this be?
2366             return new Object [] {"UTF-16BE", new Boolean(true)};
2367         }
2368         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2369             // UTF-16, little-endian, no BOM
2370             // (or could turn out to be UCS-2...
2371             return new Object [] {"UTF-16LE", new Boolean(false)};
2372         }
2373         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2374             // EBCDIC
2375             // a la xerces1, return CP037 instead of EBCDIC here
2376             return new Object [] {"CP037", null};
2377         }
2378 
2379         return defaultEncoding;
2380 
2381     } // getEncodingName(byte[],int):Object[]
2382 
2383     /**
2384      * Creates a reader capable of reading the given input stream in
2385      * the specified encoding.
2386      *
2387      * @param inputStream  The input stream.
2388      * @param encoding     The encoding name that the input stream is
2389      *                     encoded using. If the user has specified that
2390      *                     Java encoding names are allowed, then the
2391      *                     encoding name may be a Java encoding name;
2392      *                     otherwise, it is an ianaEncoding name.
2393      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
2394      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
2395      *                      unknown or not relevant.
2396      *
2397      * @return Returns a reader.
2398      */
2399     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
2400     throws IOException {
2401 
2402         // normalize encoding name
2403         if (encoding == null) {
2404             encoding = "UTF-8";
2405         }
2406 
2407         // try to use an optimized reader
2408         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
2409         if (ENCODING.equals("UTF-8")) {
2410             if (DEBUG_ENCODINGS) {
2411                 System.out.println("$$$ creating UTF8Reader");
2412             }
2413             return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
2414         }
2415         if (ENCODING.equals("US-ASCII")) {
2416             if (DEBUG_ENCODINGS) {
2417                 System.out.println("$$$ creating ASCIIReader");
2418             }
2419             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
2420         }
2421         if(ENCODING.equals("ISO-10646-UCS-4")) {
2422             if(isBigEndian != null) {
2423                 boolean isBE = isBigEndian.booleanValue();
2424                 if(isBE) {
2425                     return new UCSReader(inputStream, UCSReader.UCS4BE);
2426                 } else {
2427                     return new UCSReader(inputStream, UCSReader.UCS4LE);
2428                 }
2429             } else {
2430                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
2431                         "EncodingByteOrderUnsupported",
2432                         new Object[] { encoding },
2433                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
2434             }
2435         }
2436         if(ENCODING.equals("ISO-10646-UCS-2")) {
2437             if(isBigEndian != null) { // sould never happen with this encoding...
2438                 boolean isBE = isBigEndian.booleanValue();
2439                 if(isBE) {
2440                     return new UCSReader(inputStream, UCSReader.UCS2BE);
2441                 } else {
2442                     return new UCSReader(inputStream, UCSReader.UCS2LE);
2443                 }
2444             } else {
2445                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
2446                         "EncodingByteOrderUnsupported",
2447                         new Object[] { encoding },
2448                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
2449             }
2450         }
2451 
2452         // check for valid name
2453         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
2454         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
2455         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
2456             fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
2457                     "EncodingDeclInvalid",
2458                     new Object[] { encoding },
2459                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
2460                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
2461                     //       because every byte is a valid ISO Latin 1 character.
2462                     //       It may not translate correctly but if we failed on
2463                     //       the encoding anyway, then we're expecting the content
2464                     //       of the document to be bad. This will just prevent an
2465                     //       invalid UTF-8 sequence to be detected. This is only
2466                     //       important when continue-after-fatal-error is turned
2467                     //       on. -Ac
2468                     encoding = "ISO-8859-1";
2469         }
2470 
2471         // try to use a Java reader
2472         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
2473         if (javaEncoding == null) {
2474             if(fAllowJavaEncodings) {
2475                 javaEncoding = encoding;
2476             } else {
2477                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
2478                         "EncodingDeclInvalid",
2479                         new Object[] { encoding },
2480                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
2481                         // see comment above.
2482                         javaEncoding = "ISO8859_1";
2483             }
2484         }
2485         if (DEBUG_ENCODINGS) {
2486             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
2487             if (javaEncoding == encoding) {
2488                 System.out.print(" (IANA encoding)");
2489             }
2490             System.out.println();
2491         }
2492         return new BufferedReader( new InputStreamReader(inputStream, javaEncoding));
2493 
2494     } // createReader(InputStream,String, Boolean): Reader
2495 
2496 
2497     /**
2498      * Return the public identifier for the current document event.
2499      * <p>
2500      * The return value is the public identifier of the document
2501      * entity or of the external parsed entity in which the markup
2502      * triggering the event appears.
2503      *
2504      * @return A string containing the public identifier, or
2505      *         null if none is available.
2506      */
2507     public String getPublicId() {
2508         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
2509     } // getPublicId():String
2510 
2511     /**
2512      * Return the expanded system identifier for the current document event.
2513      * <p>
2514      * The return value is the expanded system identifier of the document
2515      * entity or of the external parsed entity in which the markup
2516      * triggering the event appears.
2517      * <p>
2518      * If the system identifier is a URL, the parser must resolve it
2519      * fully before passing it to the application.
2520      *
2521      * @return A string containing the expanded system identifier, or null
2522      *         if none is available.
2523      */
2524     public String getExpandedSystemId() {
2525         if (fCurrentEntity != null) {
2526             if (fCurrentEntity.entityLocation != null &&
2527                     fCurrentEntity.entityLocation.getExpandedSystemId() != null ) {
2528                 return fCurrentEntity.entityLocation.getExpandedSystemId();
2529             } else {
2530                 // search for the first external entity on the stack
2531                 int size = fEntityStack.size();
2532                 for (int i = size - 1; i >= 0 ; i--) {
2533                     Entity.ScannedEntity externalEntity =
2534                             (Entity.ScannedEntity)fEntityStack.elementAt(i);
2535 
2536                     if (externalEntity.entityLocation != null &&
2537                             externalEntity.entityLocation.getExpandedSystemId() != null) {
2538                         return externalEntity.entityLocation.getExpandedSystemId();
2539                     }
2540                 }
2541             }
2542         }
2543         return null;
2544     } // getExpandedSystemId():String
2545 
2546     /**
2547      * Return the literal system identifier for the current document event.
2548      * <p>
2549      * The return value is the literal system identifier of the document
2550      * entity or of the external parsed entity in which the markup
2551      * triggering the event appears.
2552      * <p>
2553      * @return A string containing the literal system identifier, or null
2554      *         if none is available.
2555      */
2556     public String getLiteralSystemId() {
2557         if (fCurrentEntity != null) {
2558             if (fCurrentEntity.entityLocation != null &&
2559                     fCurrentEntity.entityLocation.getLiteralSystemId() != null ) {
2560                 return fCurrentEntity.entityLocation.getLiteralSystemId();
2561             } else {
2562                 // search for the first external entity on the stack
2563                 int size = fEntityStack.size();
2564                 for (int i = size - 1; i >= 0 ; i--) {
2565                     Entity.ScannedEntity externalEntity =
2566                             (Entity.ScannedEntity)fEntityStack.elementAt(i);
2567 
2568                     if (externalEntity.entityLocation != null &&
2569                             externalEntity.entityLocation.getLiteralSystemId() != null) {
2570                         return externalEntity.entityLocation.getLiteralSystemId();
2571                     }
2572                 }
2573             }
2574         }
2575         return null;
2576     } // getLiteralSystemId():String
2577 
2578     /**
2579      * Return the line number where the current document event ends.
2580      * <p>
2581      * <strong>Warning:</strong> The return value from the method
2582      * is intended only as an approximation for the sake of error
2583      * reporting; it is not intended to provide sufficient information
2584      * to edit the character content of the original XML document.
2585      * <p>
2586      * The return value is an approximation of the line number
2587      * in the document entity or external parsed entity where the
2588      * markup triggering the event appears.
2589      * <p>
2590      * If possible, the SAX driver should provide the line position
2591      * of the first character after the text associated with the document
2592      * event.  The first line in the document is line 1.
2593      *
2594      * @return The line number, or -1 if none is available.
2595      */
2596     public int getLineNumber() {
2597         if (fCurrentEntity != null) {
2598             if (fCurrentEntity.isExternal()) {
2599                 return fCurrentEntity.lineNumber;
2600             } else {
2601                 // search for the first external entity on the stack
2602                 int size = fEntityStack.size();
2603                 for (int i=size-1; i>0 ; i--) {
2604                     Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i);
2605                     if (firstExternalEntity.isExternal()) {
2606                         return firstExternalEntity.lineNumber;
2607                     }
2608                 }
2609             }
2610         }
2611 
2612         return -1;
2613 
2614     } // getLineNumber():int
2615 
2616     /**
2617      * Return the column number where the current document event ends.
2618      * <p>
2619      * <strong>Warning:</strong> The return value from the method
2620      * is intended only as an approximation for the sake of error
2621      * reporting; it is not intended to provide sufficient information
2622      * to edit the character content of the original XML document.
2623      * <p>
2624      * The return value is an approximation of the column number
2625      * in the document entity or external parsed entity where the
2626      * markup triggering the event appears.
2627      * <p>
2628      * If possible, the SAX driver should provide the line position
2629      * of the first character after the text associated with the document
2630      * event.
2631      * <p>
2632      * If possible, the SAX driver should provide the line position
2633      * of the first character after the text associated with the document
2634      * event.  The first column in each line is column 1.
2635      *
2636      * @return The column number, or -1 if none is available.
2637      */
2638     public int getColumnNumber() {
2639         if (fCurrentEntity != null) {
2640             if (fCurrentEntity.isExternal()) {
2641                 return fCurrentEntity.columnNumber;
2642             } else {
2643                 // search for the first external entity on the stack
2644                 int size = fEntityStack.size();
2645                 for (int i=size-1; i>0 ; i--) {
2646                     Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i);
2647                     if (firstExternalEntity.isExternal()) {
2648                         return firstExternalEntity.columnNumber;
2649                     }
2650                 }
2651             }
2652         }
2653 
2654         return -1;
2655     } // getColumnNumber():int
2656 
2657 
2658     //
2659     // Protected static methods
2660     //
2661 
2662     /**
2663      * Fixes a platform dependent filename to standard URI form.
2664      *
2665      * @param str The string to fix.
2666      *
2667      * @return Returns the fixed URI string.
2668      */
2669     protected static String fixURI(String str) {
2670 
2671         // handle platform dependent strings
2672         str = str.replace(java.io.File.separatorChar, '/');
2673 
2674         // Windows fix
2675         if (str.length() >= 2) {
2676             char ch1 = str.charAt(1);
2677             // change "C:blah" to "/C:blah"
2678             if (ch1 == ':') {
2679                 char ch0 = Character.toUpperCase(str.charAt(0));
2680                 if (ch0 >= 'A' && ch0 <= 'Z') {
2681                     str = "/" + str;
2682                 }
2683             }
2684             // change "//blah" to "file://blah"
2685             else if (ch1 == '/' && str.charAt(0) == '/') {
2686                 str = "file:" + str;
2687             }
2688         }
2689 
2690         // replace spaces in file names with %20.
2691         // Original comment from JDK5: the following algorithm might not be
2692         // very performant, but people who want to use invalid URI's have to
2693         // pay the price.
2694         int pos = str.indexOf(' ');
2695         if (pos >= 0) {
2696             StringBuilder sb = new StringBuilder(str.length());
2697             // put characters before ' ' into the string builder
2698             for (int i = 0; i < pos; i++)
2699                 sb.append(str.charAt(i));
2700             // and %20 for the space
2701             sb.append("%20");
2702             // for the remamining part, also convert ' ' to "%20".
2703             for (int i = pos+1; i < str.length(); i++) {
2704                 if (str.charAt(i) == ' ')
2705                     sb.append("%20");
2706                 else
2707                     sb.append(str.charAt(i));
2708             }
2709             str = sb.toString();
2710         }
2711 
2712         // done
2713         return str;
2714 
2715     } // fixURI(String):String
2716 
2717 
2718     //
2719     // Package visible methods
2720     //
2721     /** Prints the contents of the buffer. */
2722     final void print() {
2723         if (DEBUG_BUFFER) {
2724             if (fCurrentEntity != null) {
2725                 System.out.print('[');
2726                 System.out.print(fCurrentEntity.count);
2727                 System.out.print(' ');
2728                 System.out.print(fCurrentEntity.position);
2729                 if (fCurrentEntity.count > 0) {
2730                     System.out.print(" \"");
2731                     for (int i = 0; i < fCurrentEntity.count; i++) {
2732                         if (i == fCurrentEntity.position) {
2733                             System.out.print('^');
2734                         }
2735                         char c = fCurrentEntity.ch[i];
2736                         switch (c) {
2737                             case '\n': {
2738                                 System.out.print("\\n");
2739                                 break;
2740                             }
2741                             case '\r': {
2742                                 System.out.print("\\r");
2743                                 break;
2744                             }
2745                             case '\t': {
2746                                 System.out.print("\\t");
2747                                 break;
2748                             }
2749                             case '\\': {
2750                                 System.out.print("\\\\");
2751                                 break;
2752                             }
2753                             default: {
2754                                 System.out.print(c);
2755                             }
2756                         }
2757                     }
2758                     if (fCurrentEntity.position == fCurrentEntity.count) {
2759                         System.out.print('^');
2760                     }
2761                     System.out.print('"');
2762                 }
2763                 System.out.print(']');
2764                 System.out.print(" @ ");
2765                 System.out.print(fCurrentEntity.lineNumber);
2766                 System.out.print(',');
2767                 System.out.print(fCurrentEntity.columnNumber);
2768             } else {
2769                 System.out.print("*NO CURRENT ENTITY*");
2770             }
2771         }
2772     } // print()
2773 
2774     /**
2775      * Buffer used in entity manager to reuse character arrays instead
2776      * of creating new ones every time.
2777      *
2778      * @xerces.internal
2779      *
2780      * @author Ankit Pasricha, IBM
2781      */
2782     private static class CharacterBuffer {
2783 
2784         /** character buffer */
2785         private char[] ch;
2786 
2787         /** whether the buffer is for an external or internal scanned entity */
2788         private boolean isExternal;
2789 
2790         public CharacterBuffer(boolean isExternal, int size) {
2791             this.isExternal = isExternal;
2792             ch = new char[size];
2793         }
2794     }
2795 
2796 
2797      /**
2798      * Stores a number of character buffers and provides it to the entity
2799      * manager to use when an entity is seen.
2800      *
2801      * @xerces.internal
2802      *
2803      * @author Ankit Pasricha, IBM
2804      */
2805     private static class CharacterBufferPool {
2806 
2807         private static final int DEFAULT_POOL_SIZE = 3;
2808 
2809         private CharacterBuffer[] fInternalBufferPool;
2810         private CharacterBuffer[] fExternalBufferPool;
2811 
2812         private int fExternalBufferSize;
2813         private int fInternalBufferSize;
2814         private int poolSize;
2815 
2816         private int fInternalTop;
2817         private int fExternalTop;
2818 
2819         public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
2820             this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
2821         }
2822 
2823         public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
2824             fExternalBufferSize = externalBufferSize;
2825             fInternalBufferSize = internalBufferSize;
2826             this.poolSize = poolSize;
2827             init();
2828         }
2829 
2830         /** Initializes buffer pool. **/
2831         private void init() {
2832             fInternalBufferPool = new CharacterBuffer[poolSize];
2833             fExternalBufferPool = new CharacterBuffer[poolSize];
2834             fInternalTop = -1;
2835             fExternalTop = -1;
2836         }
2837 
2838         /** Retrieves buffer from pool. **/
2839         public CharacterBuffer getBuffer(boolean external) {
2840             if (external) {
2841                 if (fExternalTop > -1) {
2842                     return (CharacterBuffer)fExternalBufferPool[fExternalTop--];
2843                 }
2844                 else {
2845                     return new CharacterBuffer(true, fExternalBufferSize);
2846                 }
2847             }
2848             else {
2849                 if (fInternalTop > -1) {
2850                     return (CharacterBuffer)fInternalBufferPool[fInternalTop--];
2851                 }
2852                 else {
2853                     return new CharacterBuffer(false, fInternalBufferSize);
2854                 }
2855             }
2856         }
2857 
2858         /** Returns buffer to pool. **/
2859         public void returnToPool(CharacterBuffer buffer) {
2860             if (buffer.isExternal) {
2861                 if (fExternalTop < fExternalBufferPool.length - 1) {
2862                     fExternalBufferPool[++fExternalTop] = buffer;
2863                 }
2864             }
2865             else if (fInternalTop < fInternalBufferPool.length - 1) {
2866                 fInternalBufferPool[++fInternalTop] = buffer;
2867             }
2868         }
2869 
2870         /** Sets the size of external buffers and dumps the old pool. **/
2871         public void setExternalBufferSize(int bufferSize) {
2872             fExternalBufferSize = bufferSize;
2873             fExternalBufferPool = new CharacterBuffer[poolSize];
2874             fExternalTop = -1;
2875         }
2876     }
2877 
2878     /**
2879     * This class wraps the byte inputstreams we're presented with.
2880     * We need it because java.io.InputStreams don't provide
2881     * functionality to reread processed bytes, and they have a habit
2882     * of reading more than one character when you call their read()
2883     * methods.  This means that, once we discover the true (declared)
2884     * encoding of a document, we can neither backtrack to read the
2885     * whole doc again nor start reading where we are with a new
2886     * reader.
2887     *
2888     * This class allows rewinding an inputStream by allowing a mark
2889     * to be set, and the stream reset to that position.  <strong>The
2890     * class assumes that it needs to read one character per
2891     * invocation when it's read() method is inovked, but uses the
2892     * underlying InputStream's read(char[], offset length) method--it
2893     * won't buffer data read this way!</strong>
2894     *
2895     * @xerces.internal
2896     *
2897     * @author Neil Graham, IBM
2898     * @author Glenn Marcy, IBM
2899     */
2900 
2901     protected final class RewindableInputStream extends InputStream {
2902 
2903         private InputStream fInputStream;
2904         private byte[] fData;
2905         private int fStartOffset;
2906         private int fEndOffset;
2907         private int fOffset;
2908         private int fLength;
2909         private int fMark;
2910 
2911         public RewindableInputStream(InputStream is) {
2912             fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
2913             fInputStream = is;
2914             fStartOffset = 0;
2915             fEndOffset = -1;
2916             fOffset = 0;
2917             fLength = 0;
2918             fMark = 0;
2919         }
2920 
2921         public void setStartOffset(int offset) {
2922             fStartOffset = offset;
2923         }
2924 
2925         public void rewind() {
2926             fOffset = fStartOffset;
2927         }
2928 
2929         public int read() throws IOException {
2930             int b = 0;
2931             if (fOffset < fLength) {
2932                 return fData[fOffset++] & 0xff;
2933             }
2934             if (fOffset == fEndOffset) {
2935                 return -1;
2936             }
2937             if (fOffset == fData.length) {
2938                 byte[] newData = new byte[fOffset << 1];
2939                 System.arraycopy(fData, 0, newData, 0, fOffset);
2940                 fData = newData;
2941             }
2942             b = fInputStream.read();
2943             if (b == -1) {
2944                 fEndOffset = fOffset;
2945                 return -1;
2946             }
2947             fData[fLength++] = (byte)b;
2948             fOffset++;
2949             return b & 0xff;
2950         }
2951 
2952         public int read(byte[] b, int off, int len) throws IOException {
2953             int bytesLeft = fLength - fOffset;
2954             if (bytesLeft == 0) {
2955                 if (fOffset == fEndOffset) {
2956                     return -1;
2957                 }
2958 
2959                 /**
2960                  * //System.out.println("fCurrentEntitty = " + fCurrentEntity );
2961                  * //System.out.println("fInputStream = " + fInputStream );
2962                  * // better get some more for the voracious reader... */
2963 
2964                 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
2965 
2966                     if (!fCurrentEntity.xmlDeclChunkRead)
2967                     {
2968                         fCurrentEntity.xmlDeclChunkRead = true;
2969                         len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
2970                     }
2971                     return fInputStream.read(b, off, len);
2972                 }
2973 
2974                 int returnedVal = read();
2975                 if(returnedVal == -1) {
2976                   fEndOffset = fOffset;
2977                   return -1;
2978                 }
2979                 b[off] = (byte)returnedVal;
2980                 return 1;
2981 
2982             }
2983             if (len < bytesLeft) {
2984                 if (len <= 0) {
2985                     return 0;
2986                 }
2987             } else {
2988                 len = bytesLeft;
2989             }
2990             if (b != null) {
2991                 System.arraycopy(fData, fOffset, b, off, len);
2992             }
2993             fOffset += len;
2994             return len;
2995         }
2996 
2997         public long skip(long n)
2998         throws IOException {
2999             int bytesLeft;
3000             if (n <= 0) {
3001                 return 0;
3002             }
3003             bytesLeft = fLength - fOffset;
3004             if (bytesLeft == 0) {
3005                 if (fOffset == fEndOffset) {
3006                     return 0;
3007                 }
3008                 return fInputStream.skip(n);
3009             }
3010             if (n <= bytesLeft) {
3011                 fOffset += n;
3012                 return n;
3013             }
3014             fOffset += bytesLeft;
3015             if (fOffset == fEndOffset) {
3016                 return bytesLeft;
3017             }
3018             n -= bytesLeft;
3019             /*
3020             * In a manner of speaking, when this class isn't permitting more
3021             * than one byte at a time to be read, it is "blocking".  The
3022             * available() method should indicate how much can be read without
3023             * blocking, so while we're in this mode, it should only indicate
3024             * that bytes in its buffer are available; otherwise, the result of
3025             * available() on the underlying InputStream is appropriate.
3026             */
3027             return fInputStream.skip(n) + bytesLeft;
3028         }
3029 
3030         public int available() throws IOException {
3031             int bytesLeft = fLength - fOffset;
3032             if (bytesLeft == 0) {
3033                 if (fOffset == fEndOffset) {
3034                     return -1;
3035                 }
3036                 return fCurrentEntity.mayReadChunks ? fInputStream.available()
3037                 : 0;
3038             }
3039             return bytesLeft;
3040         }
3041 
3042         public void mark(int howMuch) {
3043             fMark = fOffset;
3044         }
3045 
3046         public void reset() {
3047             fOffset = fMark;
3048             //test();
3049         }
3050 
3051         public boolean markSupported() {
3052             return true;
3053         }
3054 
3055         public void close() throws IOException {
3056             if (fInputStream != null) {
3057                 fInputStream.close();
3058                 fInputStream = null;
3059             }
3060         }
3061     } // end of RewindableInputStream class
3062 
3063     public void test(){
3064         //System.out.println("TESTING: Added familytree to entityManager");
3065         //Usecase1
3066         fEntityStorage.addExternalEntity("entityUsecase1",null,
3067                 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt",
3068                 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml");
3069 
3070         //Usecase2
3071         fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>");
3072         fEntityStorage.addInternalEntity("entityUsecase3","value3");
3073         fEntityStorage.addInternalEntity("text", "Hello World.");
3074         fEntityStorage.addInternalEntity("empty-element", "<foo/>");
3075         fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>");
3076         fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>");
3077         fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>");
3078         fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>");
3079         fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>");
3080         fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>");
3081         fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>");
3082         fEntityStorage.addInternalEntity("ch","©");
3083         fEntityStorage.addInternalEntity("ch1","T");
3084         fEntityStorage.addInternalEntity("% ch2","param");
3085     }
3086 
3087 } // class XMLEntityManager