1 /*
   2  * Copyright (c) 2004, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package org.xml.sax.ext;
  27 
  28 import java.io.IOException;
  29 
  30 import org.xml.sax.EntityResolver;
  31 import org.xml.sax.InputSource;
  32 import org.xml.sax.XMLReader;
  33 import org.xml.sax.SAXException;
  34 
  35 
  36 /**
  37  * Extended interface for mapping external entity references to input
  38  * sources, or providing a missing external subset.  The
  39  * {@link XMLReader#setEntityResolver XMLReader.setEntityResolver()} method
  40  * is used to provide implementations of this interface to parsers.
  41  * When a parser uses the methods in this interface, the
  42  * {@link EntityResolver2#resolveEntity EntityResolver2.resolveEntity()}
  43  * method (in this interface) is used <em>instead of</em> the older (SAX 1.0)
  44  * {@link EntityResolver#resolveEntity EntityResolver.resolveEntity()} method.
  45  *
  46  * <p>If a SAX application requires the customized handling which this
  47  * interface defines for external entities, it must ensure that it uses
  48  * an XMLReader with the
  49  * <em>http://xml.org/sax/features/use-entity-resolver2</em> feature flag
  50  * set to <em>true</em> (which is its default value when the feature is
  51  * recognized).  If that flag is unrecognized, or its value is false,
  52  * or the resolver does not implement this interface, then only the
  53  * {@link EntityResolver} method will be used.
  54  *
  55  * <p>That supports three categories of application that modify entity
  56  * resolution.  <em>Old Style</em> applications won't know about this interface;
  57  * they will provide an EntityResolver.
  58  * <em>Transitional Mode</em> provide an EntityResolver2 and automatically
  59  * get the benefit of its methods in any systems (parsers or other tools)
  60  * supporting it, due to polymorphism.
  61  * Both <em>Old Style</em> and <em>Transitional Mode</em> applications will
  62  * work with any SAX2 parser.
  63  * <em>New style</em> applications will fail to run except on SAX2 parsers
  64  * that support this particular feature.
  65  * They will insist that feature flag have a value of "true", and the
  66  * EntityResolver2 implementation they provide  might throw an exception
  67  * if the original SAX 1.0 style entity resolution method is invoked.
  68  *
  69  * @see org.xml.sax.XMLReader#setEntityResolver
  70  *
  71  * @since 1.5, SAX 2.0 (extensions 1.1 alpha)
  72  * @author David Brownell
  73  */
  74 public interface EntityResolver2 extends EntityResolver
  75 {
  76     /**
  77      * Allows applications to provide an external subset for documents
  78      * that don't explicitly define one.  Documents with DOCTYPE declarations
  79      * that omit an external subset can thus augment the declarations
  80      * available for validation, entity processing, and attribute processing
  81      * (normalization, defaulting, and reporting types including ID).
  82      * This augmentation is reported
  83      * through the {@link LexicalHandler#startDTD startDTD()} method as if
  84      * the document text had originally included the external subset;
  85      * this callback is made before any internal subset data or errors
  86      * are reported.
  87      *
  88      * <p>This method can also be used with documents that have no DOCTYPE
  89      * declaration.  When the root element is encountered,
  90      * but no DOCTYPE declaration has been seen, this method is
  91      * invoked.  If it returns a value for the external subset, that root
  92      * element is declared to be the root element, giving the effect of
  93      * splicing a DOCTYPE declaration at the end the prolog of a document
  94      * that could not otherwise be valid.  The sequence of parser callbacks
  95      * in that case logically resembles this:
  96      *
  97      * <pre>
  98      * ... comments and PIs from the prolog (as usual)
  99      * startDTD ("rootName", source.getPublicId (), source.getSystemId ());
 100      * startEntity ("[dtd]");
 101      * ... declarations, comments, and PIs from the external subset
 102      * endEntity ("[dtd]");
 103      * endDTD ();
 104      * ... then the rest of the document (as usual)
 105      * startElement (..., "rootName", ...);
 106      * </pre>
 107      *
 108      * <p>Note that the InputSource gets no further resolution.
 109      * Implementations of this method may wish to invoke
 110      * {@link #resolveEntity resolveEntity()} to gain benefits such as use
 111      * of local caches of DTD entities.  Also, this method will never be
 112      * used by a (non-validating) processor that is not including external
 113      * parameter entities.
 114      *
 115      * <p>Uses for this method include facilitating data validation when
 116      * interoperating with XML processors that would always require
 117      * undesirable network accesses for external entities, or which for
 118      * other reasons adopt a "no DTDs" policy.
 119      * Non-validation motives include forcing documents to include DTDs so
 120      * that attributes are handled consistently.
 121      * For example, an XPath processor needs to know which attibutes have
 122      * type "ID" before it can process a widely used type of reference.
 123      *
 124      * <p><strong>Warning:</strong> Returning an external subset modifies
 125      * the input document.  By providing definitions for general entities,
 126      * it can make a malformed document appear to be well formed.
 127      *
 128      * @param name Identifies the document root element.  This name comes
 129      *  from a DOCTYPE declaration (where available) or from the actual
 130      *  root element.
 131      * @param baseURI The document's base URI, serving as an additional
 132      *  hint for selecting the external subset.  This is always an absolute
 133      *  URI, unless it is null because the XMLReader was given an InputSource
 134      *  without one.
 135      *
 136      * @return An InputSource object describing the new external subset
 137      *  to be used by the parser, or null to indicate that no external
 138      *  subset is provided.
 139      *
 140      * @exception SAXException Any SAX exception, possibly wrapping
 141      *  another exception.
 142      * @exception IOException Probably indicating a failure to create
 143      *  a new InputStream or Reader, or an illegal URL.
 144      */
 145     public InputSource getExternalSubset (String name, String baseURI)
 146     throws SAXException, IOException;
 147 
 148     /**
 149      * Allows applications to map references to external entities into input
 150      * sources, or tell the parser it should use conventional URI resolution.
 151      * This method is only called for external entities which have been
 152      * properly declared.
 153      * This method provides more flexibility than the {@link EntityResolver}
 154      * interface, supporting implementations of more complex catalogue
 155      * schemes such as the one defined by the <a href=
 156      *   "http://www.oasis-open.org/committees/entity/spec-2001-08-06.html"
 157      *   >OASIS XML Catalogs</a> specification.
 158      *
 159      * <p>Parsers configured to use this resolver method will call it
 160      * to determine the input source to use for any external entity
 161      * being included because of a reference in the XML text.
 162      * That excludes the document entity, and any external entity returned
 163      * by {@link #getExternalSubset getExternalSubset()}.
 164      * When a (non-validating) processor is configured not to include
 165      * a class of entities (parameter or general) through use of feature
 166      * flags, this method is not invoked for such entities.
 167      *
 168      * <p>Note that the entity naming scheme used here is the same one
 169      * used in the {@link LexicalHandler}, or in the {@link
 170      *   org.xml.sax.ContentHandler#skippedEntity
 171      *   ContentHandler.skippedEntity()}
 172      * method.
 173      *
 174      * @param name Identifies the external entity being resolved.
 175      *  Either "[dtd]" for the external subset, or a name starting
 176      *  with "%" to indicate a parameter entity, or else the name of
 177      *  a general entity.  This is never null when invoked by a SAX2
 178      *  parser.
 179      * @param publicId The public identifier of the external entity being
 180      *  referenced (normalized as required by the XML specification), or
 181      *  null if none was supplied.
 182      * @param baseURI The URI with respect to which relative systemIDs
 183      *  are interpreted.  This is always an absolute URI, unless it is
 184      *  null (likely because the XMLReader was given an InputSource without
 185      *  one).  This URI is defined by the XML specification to be the one
 186      *  associated with the "{@literal <}" starting the relevant declaration.
 187      * @param systemId The system identifier of the external entity
 188      *  being referenced; either a relative or absolute URI.
 189      *  This is never null when invoked by a SAX2 parser; only declared
 190      *  entities, and any external subset, are resolved by such parsers.
 191      *
 192      * @return An InputSource object describing the new input source to
 193      *  be used by the parser.  Returning null directs the parser to
 194      *  resolve the system ID against the base URI and open a connection
 195      *  to resulting URI.
 196      *
 197      * @exception SAXException Any SAX exception, possibly wrapping
 198      *  another exception.
 199      * @exception IOException Probably indicating a failure to create
 200      *  a new InputStream or Reader, or an illegal URL.
 201      */
 202     public InputSource resolveEntity (
 203             String name,
 204             String publicId,
 205             String baseURI,
 206             String systemId
 207     ) throws SAXException, IOException;
 208 }