1 /*
   2  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package org.xml.sax;
  27 
  28 import java.io.IOException;
  29 import java.io.Reader;
  30 import java.io.InputStream;
  31 
  32 /**
  33  * A single input source for an XML entity.
  34  *
  35  * <p>This class allows a SAX application to encapsulate information
  36  * about an input source in a single object, which may include
  37  * a public identifier, a system identifier, a byte stream (possibly
  38  * with a specified encoding), and/or a character stream.</p>
  39  *
  40  * <p>There are two places that the application can deliver an
  41  * input source to the parser: as the argument to the Parser.parse
  42  * method, or as the return value of the EntityResolver.resolveEntity
  43  * method.</p>
  44  *
  45  * <p>The SAX parser will use the InputSource object to determine how
  46  * to read XML input.  If there is a character stream available, the
  47  * parser will read that stream directly, disregarding any text
  48  * encoding declaration found in that stream.
  49  * If there is no character stream, but there is
  50  * a byte stream, the parser will use that byte stream, using the
  51  * encoding specified in the InputSource or else (if no encoding is
  52  * specified) autodetecting the character encoding using an algorithm
  53  * such as the one in the XML specification.  If neither a character
  54  * stream nor a
  55  * byte stream is available, the parser will attempt to open a URI
  56  * connection to the resource identified by the system
  57  * identifier.</p>
  58  *
  59  * <p>An InputSource object belongs to the application: the SAX parser
  60  * shall never modify it in any way (it may modify a copy if
  61  * necessary).  However, standard processing of both byte and
  62  * character streams is to close them on as part of end-of-parse cleanup,
  63  * so applications should not attempt to re-use such streams after they
  64  * have been handed to a parser.  </p>
  65  *
  66  * @since 1.4, SAX 1.0
  67  * @author David Megginson
  68  * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
  69  * @see org.xml.sax.EntityResolver#resolveEntity
  70  * @see java.io.InputStream
  71  * @see java.io.Reader
  72  */
  73 public class InputSource {
  74 
  75     /**
  76      * Zero-argument default constructor.
  77      *
  78      * @see #setPublicId
  79      * @see #setSystemId
  80      * @see #setByteStream
  81      * @see #setCharacterStream
  82      * @see #setEncoding
  83      */
  84     public InputSource ()
  85     {
  86     }
  87 
  88 
  89     /**
  90      * Create a new input source with a system identifier.
  91      *
  92      * <p>Applications may use setPublicId to include a
  93      * public identifier as well, or setEncoding to specify
  94      * the character encoding, if known.</p>
  95      *
  96      * <p>If the system identifier is a URL, it must be fully
  97      * resolved (it may not be a relative URL).</p>
  98      *
  99      * @param systemId The system identifier (URI).
 100      * @see #setPublicId
 101      * @see #setSystemId
 102      * @see #setByteStream
 103      * @see #setEncoding
 104      * @see #setCharacterStream
 105      */
 106     public InputSource (String systemId)
 107     {
 108         setSystemId(systemId);
 109     }
 110 
 111 
 112     /**
 113      * Create a new input source with a byte stream.
 114      *
 115      * <p>Application writers should use setSystemId() to provide a base
 116      * for resolving relative URIs, may use setPublicId to include a
 117      * public identifier, and may use setEncoding to specify the object's
 118      * character encoding.</p>
 119      *
 120      * @param byteStream The raw byte stream containing the document.
 121      * @see #setPublicId
 122      * @see #setSystemId
 123      * @see #setEncoding
 124      * @see #setByteStream
 125      * @see #setCharacterStream
 126      */
 127     public InputSource (InputStream byteStream)
 128     {
 129         setByteStream(byteStream);
 130     }
 131 
 132 
 133     /**
 134      * Create a new input source with a character stream.
 135      *
 136      * <p>Application writers should use setSystemId() to provide a base
 137      * for resolving relative URIs, and may use setPublicId to include a
 138      * public identifier.</p>
 139      *
 140      * <p>The character stream shall not include a byte order mark.</p>
 141      *
 142      * @param characterStream the character stream
 143      * @see #setPublicId
 144      * @see #setSystemId
 145      * @see #setByteStream
 146      * @see #setCharacterStream
 147      */
 148     public InputSource (Reader characterStream)
 149     {
 150         setCharacterStream(characterStream);
 151     }
 152 
 153 
 154     /**
 155      * Set the public identifier for this input source.
 156      *
 157      * <p>The public identifier is always optional: if the application
 158      * writer includes one, it will be provided as part of the
 159      * location information.</p>
 160      *
 161      * @param publicId The public identifier as a string.
 162      * @see #getPublicId
 163      * @see org.xml.sax.Locator#getPublicId
 164      * @see org.xml.sax.SAXParseException#getPublicId
 165      */
 166     public void setPublicId (String publicId)
 167     {
 168         this.publicId = publicId;
 169     }
 170 
 171 
 172     /**
 173      * Get the public identifier for this input source.
 174      *
 175      * @return The public identifier, or null if none was supplied.
 176      * @see #setPublicId
 177      */
 178     public String getPublicId ()
 179     {
 180         return publicId;
 181     }
 182 
 183 
 184     /**
 185      * Set the system identifier for this input source.
 186      *
 187      * <p>The system identifier is optional if there is a byte stream
 188      * or a character stream, but it is still useful to provide one,
 189      * since the application can use it to resolve relative URIs
 190      * and can include it in error messages and warnings (the parser
 191      * will attempt to open a connection to the URI only if
 192      * there is no byte stream or character stream specified).</p>
 193      *
 194      * <p>If the application knows the character encoding of the
 195      * object pointed to by the system identifier, it can register
 196      * the encoding using the setEncoding method.</p>
 197      *
 198      * <p>If the system identifier is a URL, it must be fully
 199      * resolved (it may not be a relative URL).</p>
 200      *
 201      * @param systemId The system identifier as a string.
 202      * @see #setEncoding
 203      * @see #getSystemId
 204      * @see org.xml.sax.Locator#getSystemId
 205      * @see org.xml.sax.SAXParseException#getSystemId
 206      */
 207     public void setSystemId (String systemId)
 208     {
 209         this.systemId = systemId;
 210     }
 211 
 212 
 213     /**
 214      * Get the system identifier for this input source.
 215      *
 216      * <p>The getEncoding method will return the character encoding
 217      * of the object pointed to, or null if unknown.</p>
 218      *
 219      * <p>If the system ID is a URL, it will be fully resolved.</p>
 220      *
 221      * @return The system identifier, or null if none was supplied.
 222      * @see #setSystemId
 223      * @see #getEncoding
 224      */
 225     public String getSystemId ()
 226     {
 227         return systemId;
 228     }
 229 
 230 
 231     /**
 232      * Set the byte stream for this input source.
 233      *
 234      * <p>The SAX parser will ignore this if there is also a character
 235      * stream specified, but it will use a byte stream in preference
 236      * to opening a URI connection itself.</p>
 237      *
 238      * <p>If the application knows the character encoding of the
 239      * byte stream, it should set it with the setEncoding method.</p>
 240      *
 241      * @param byteStream A byte stream containing an XML document or
 242      *        other entity.
 243      * @see #setEncoding
 244      * @see #getByteStream
 245      * @see #getEncoding
 246      * @see java.io.InputStream
 247      */
 248     public void setByteStream (InputStream byteStream)
 249     {
 250         this.byteStream = byteStream;
 251     }
 252 
 253 
 254     /**
 255      * Get the byte stream for this input source.
 256      *
 257      * <p>The getEncoding method will return the character
 258      * encoding for this byte stream, or null if unknown.</p>
 259      *
 260      * @return The byte stream, or null if none was supplied.
 261      * @see #getEncoding
 262      * @see #setByteStream
 263      */
 264     public InputStream getByteStream ()
 265     {
 266         return byteStream;
 267     }
 268 
 269 
 270     /**
 271      * Set the character encoding, if known.
 272      *
 273      * <p>The encoding must be a string acceptable for an
 274      * XML encoding declaration (see section 4.3.3 of the XML 1.0
 275      * recommendation).</p>
 276      *
 277      * <p>This method has no effect when the application provides a
 278      * character stream.</p>
 279      *
 280      * @param encoding A string describing the character encoding.
 281      * @see #setSystemId
 282      * @see #setByteStream
 283      * @see #getEncoding
 284      */
 285     public void setEncoding (String encoding)
 286     {
 287         this.encoding = encoding;
 288     }
 289 
 290 
 291     /**
 292      * Get the character encoding for a byte stream or URI.
 293      * This value will be ignored when the application provides a
 294      * character stream.
 295      *
 296      * @return The encoding, or null if none was supplied.
 297      * @see #setByteStream
 298      * @see #getSystemId
 299      * @see #getByteStream
 300      */
 301     public String getEncoding ()
 302     {
 303         return encoding;
 304     }
 305 
 306 
 307     /**
 308      * Set the character stream for this input source.
 309      *
 310      * <p>If there is a character stream specified, the SAX parser
 311      * will ignore any byte stream and will not attempt to open
 312      * a URI connection to the system identifier.</p>
 313      *
 314      * @param characterStream The character stream containing the
 315      *        XML document or other entity.
 316      * @see #getCharacterStream
 317      * @see java.io.Reader
 318      */
 319     public void setCharacterStream (Reader characterStream)
 320     {
 321         this.characterStream = characterStream;
 322     }
 323 
 324 
 325     /**
 326      * Get the character stream for this input source.
 327      *
 328      * @return The character stream, or null if none was supplied.
 329      * @see #setCharacterStream
 330      */
 331     public Reader getCharacterStream ()
 332     {
 333         return characterStream;
 334     }
 335 
 336     /**
 337      * Indicates whether the {@code InputSource} object is empty. Empty is
 338      * defined as follows:
 339      * <ul>
 340      * <li>All of the input sources, including the public identifier, system
 341      * identifier, byte stream, and character stream, are {@code null}.
 342      * </li>
 343      * <li>The public identifier and system identifier are  {@code null}, and
 344      * byte and character stream are either  {@code null} or contain no byte
 345      * or character.
 346      * <p>
 347      * Note that this method will reset the byte stream if it is provided, or
 348      * the character stream if the byte stream is not provided.
 349      * </li>
 350      * </ul>
 351      * <p>
 352      * In case of error while checking the byte or character stream, the method
 353      * will return false to allow the XML processor to handle the error.
 354      *
 355      * @return true if the {@code InputSource} object is empty, false otherwise
 356      */
 357     public boolean isEmpty() {
 358         return (publicId == null && systemId == null && isStreamEmpty());
 359     }
 360 
 361     private boolean isStreamEmpty() {
 362         boolean empty = true;
 363         try {
 364             if (byteStream != null) {
 365                 byteStream.reset();
 366                 int bytesRead = byteStream.available();
 367                 if (bytesRead > 0) {
 368                     return false;
 369                 }
 370             }
 371 
 372             if (characterStream != null) {
 373                 characterStream.reset();
 374                 int c = characterStream.read();
 375                 characterStream.reset();
 376                 if (c != -1) {
 377                     return false;
 378                 }
 379             }
 380         } catch (IOException ex) {
 381             //in case of error, return false
 382             return false;
 383         }
 384 
 385         return empty;
 386     }
 387     ////////////////////////////////////////////////////////////////////
 388     // Internal state.
 389     ////////////////////////////////////////////////////////////////////
 390 
 391     private String publicId;
 392     private String systemId;
 393     private InputStream byteStream;
 394     private String encoding;
 395     private Reader characterStream;
 396 
 397 }
 398 
 399 // end of InputSource.java