1 /*
   2  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package org.xml.sax;
  27 
  28 import java.io.IOException;
  29 import java.io.Reader;
  30 import java.io.InputStream;
  31 
  32 /**
  33  * A single input source for an XML entity.
  34  *
  35  * <p>This class allows a SAX application to encapsulate information
  36  * about an input source in a single object, which may include
  37  * a public identifier, a system identifier, a byte stream (possibly
  38  * with a specified encoding), and/or a character stream.</p>
  39  *
  40  * <p>There are two places that the application can deliver an
  41  * input source to the parser: as the argument to the Parser.parse
  42  * method, or as the return value of the EntityResolver.resolveEntity
  43  * method.</p>
  44  *
  45  * <p>The SAX parser will use the InputSource object to determine how
  46  * to read XML input.  If there is a character stream available, the
  47  * parser will read that stream directly, disregarding any text
  48  * encoding declaration found in that stream.
  49  * If there is no character stream, but there is
  50  * a byte stream, the parser will use that byte stream, using the
  51  * encoding specified in the InputSource or else (if no encoding is
  52  * specified) autodetecting the character encoding using an algorithm
  53  * such as the one in the XML specification.  If neither a character
  54  * stream nor a
  55  * byte stream is available, the parser will attempt to open a URI
  56  * connection to the resource identified by the system
  57  * identifier.</p>
  58  *
  59  * <p>An InputSource object belongs to the application: the SAX parser
  60  * shall never modify it in any way (it may modify a copy if
  61  * necessary).  However, standard processing of both byte and
  62  * character streams is to close them on as part of end-of-parse cleanup,
  63  * so applications should not attempt to re-use such streams after they
  64  * have been handed to a parser.  </p>
  65  *
  66  * @since 1.4, SAX 1.0
  67  * @author David Megginson
  68  * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
  69  * @see org.xml.sax.EntityResolver#resolveEntity
  70  * @see java.io.InputStream
  71  * @see java.io.Reader
  72  */
  73 public class InputSource {
  74 
  75     /**
  76      * Zero-argument default constructor.
  77      *
  78      * @see #setPublicId
  79      * @see #setSystemId
  80      * @see #setByteStream
  81      * @see #setCharacterStream
  82      * @see #setEncoding
  83      */
  84     public InputSource ()
  85     {
  86     }
  87 
  88 
  89     /**
  90      * Create a new input source with a system identifier.
  91      *
  92      * <p>Applications may use setPublicId to include a
  93      * public identifier as well, or setEncoding to specify
  94      * the character encoding, if known.</p>
  95      *
  96      * <p>If the system identifier is a URL, it must be fully
  97      * resolved (it may not be a relative URL).</p>
  98      *
  99      * @param systemId The system identifier (URI).
 100      * @see #setPublicId
 101      * @see #setSystemId
 102      * @see #setByteStream
 103      * @see #setEncoding
 104      * @see #setCharacterStream
 105      */
 106     public InputSource (String systemId)
 107     {
 108         setSystemId(systemId);
 109     }
 110 
 111 
 112     /**
 113      * Create a new input source with a byte stream.
 114      *
 115      * <p>Application writers should use setSystemId() to provide a base
 116      * for resolving relative URIs, may use setPublicId to include a
 117      * public identifier, and may use setEncoding to specify the object's
 118      * character encoding.</p>
 119      *
 120      * @param byteStream The raw byte stream containing the document.
 121      * @see #setPublicId
 122      * @see #setSystemId
 123      * @see #setEncoding
 124      * @see #setByteStream
 125      * @see #setCharacterStream
 126      */
 127     public InputSource (InputStream byteStream)
 128     {
 129         setByteStream(byteStream);
 130     }
 131 
 132 
 133     /**
 134      * Create a new input source with a character stream.
 135      *
 136      * <p>Application writers should use setSystemId() to provide a base
 137      * for resolving relative URIs, and may use setPublicId to include a
 138      * public identifier.</p>
 139      *
 140      * <p>The character stream shall not include a byte order mark.</p>
 141      *
 142      * @see #setPublicId
 143      * @see #setSystemId
 144      * @see #setByteStream
 145      * @see #setCharacterStream
 146      */
 147     public InputSource (Reader characterStream)
 148     {
 149         setCharacterStream(characterStream);
 150     }
 151 
 152 
 153     /**
 154      * Set the public identifier for this input source.
 155      *
 156      * <p>The public identifier is always optional: if the application
 157      * writer includes one, it will be provided as part of the
 158      * location information.</p>
 159      *
 160      * @param publicId The public identifier as a string.
 161      * @see #getPublicId
 162      * @see org.xml.sax.Locator#getPublicId
 163      * @see org.xml.sax.SAXParseException#getPublicId
 164      */
 165     public void setPublicId (String publicId)
 166     {
 167         this.publicId = publicId;
 168     }
 169 
 170 
 171     /**
 172      * Get the public identifier for this input source.
 173      *
 174      * @return The public identifier, or null if none was supplied.
 175      * @see #setPublicId
 176      */
 177     public String getPublicId ()
 178     {
 179         return publicId;
 180     }
 181 
 182 
 183     /**
 184      * Set the system identifier for this input source.
 185      *
 186      * <p>The system identifier is optional if there is a byte stream
 187      * or a character stream, but it is still useful to provide one,
 188      * since the application can use it to resolve relative URIs
 189      * and can include it in error messages and warnings (the parser
 190      * will attempt to open a connection to the URI only if
 191      * there is no byte stream or character stream specified).</p>
 192      *
 193      * <p>If the application knows the character encoding of the
 194      * object pointed to by the system identifier, it can register
 195      * the encoding using the setEncoding method.</p>
 196      *
 197      * <p>If the system identifier is a URL, it must be fully
 198      * resolved (it may not be a relative URL).</p>
 199      *
 200      * @param systemId The system identifier as a string.
 201      * @see #setEncoding
 202      * @see #getSystemId
 203      * @see org.xml.sax.Locator#getSystemId
 204      * @see org.xml.sax.SAXParseException#getSystemId
 205      */
 206     public void setSystemId (String systemId)
 207     {
 208         this.systemId = systemId;
 209     }
 210 
 211 
 212     /**
 213      * Get the system identifier for this input source.
 214      *
 215      * <p>The getEncoding method will return the character encoding
 216      * of the object pointed to, or null if unknown.</p>
 217      *
 218      * <p>If the system ID is a URL, it will be fully resolved.</p>
 219      *
 220      * @return The system identifier, or null if none was supplied.
 221      * @see #setSystemId
 222      * @see #getEncoding
 223      */
 224     public String getSystemId ()
 225     {
 226         return systemId;
 227     }
 228 
 229 
 230     /**
 231      * Set the byte stream for this input source.
 232      *
 233      * <p>The SAX parser will ignore this if there is also a character
 234      * stream specified, but it will use a byte stream in preference
 235      * to opening a URI connection itself.</p>
 236      *
 237      * <p>If the application knows the character encoding of the
 238      * byte stream, it should set it with the setEncoding method.</p>
 239      *
 240      * @param byteStream A byte stream containing an XML document or
 241      *        other entity.
 242      * @see #setEncoding
 243      * @see #getByteStream
 244      * @see #getEncoding
 245      * @see java.io.InputStream
 246      */
 247     public void setByteStream (InputStream byteStream)
 248     {
 249         this.byteStream = byteStream;
 250     }
 251 
 252 
 253     /**
 254      * Get the byte stream for this input source.
 255      *
 256      * <p>The getEncoding method will return the character
 257      * encoding for this byte stream, or null if unknown.</p>
 258      *
 259      * @return The byte stream, or null if none was supplied.
 260      * @see #getEncoding
 261      * @see #setByteStream
 262      */
 263     public InputStream getByteStream ()
 264     {
 265         return byteStream;
 266     }
 267 
 268 
 269     /**
 270      * Set the character encoding, if known.
 271      *
 272      * <p>The encoding must be a string acceptable for an
 273      * XML encoding declaration (see section 4.3.3 of the XML 1.0
 274      * recommendation).</p>
 275      *
 276      * <p>This method has no effect when the application provides a
 277      * character stream.</p>
 278      *
 279      * @param encoding A string describing the character encoding.
 280      * @see #setSystemId
 281      * @see #setByteStream
 282      * @see #getEncoding
 283      */
 284     public void setEncoding (String encoding)
 285     {
 286         this.encoding = encoding;
 287     }
 288 
 289 
 290     /**
 291      * Get the character encoding for a byte stream or URI.
 292      * This value will be ignored when the application provides a
 293      * character stream.
 294      *
 295      * @return The encoding, or null if none was supplied.
 296      * @see #setByteStream
 297      * @see #getSystemId
 298      * @see #getByteStream
 299      */
 300     public String getEncoding ()
 301     {
 302         return encoding;
 303     }
 304 
 305 
 306     /**
 307      * Set the character stream for this input source.
 308      *
 309      * <p>If there is a character stream specified, the SAX parser
 310      * will ignore any byte stream and will not attempt to open
 311      * a URI connection to the system identifier.</p>
 312      *
 313      * @param characterStream The character stream containing the
 314      *        XML document or other entity.
 315      * @see #getCharacterStream
 316      * @see java.io.Reader
 317      */
 318     public void setCharacterStream (Reader characterStream)
 319     {
 320         this.characterStream = characterStream;
 321     }
 322 
 323 
 324     /**
 325      * Get the character stream for this input source.
 326      *
 327      * @return The character stream, or null if none was supplied.
 328      * @see #setCharacterStream
 329      */
 330     public Reader getCharacterStream ()
 331     {
 332         return characterStream;
 333     }
 334 
 335     /**
 336      * Indicates whether the {@code InputSource} object is empty. Empty is
 337      * defined as follows:
 338      * <ul>
 339      * <li>All of the input sources, including the public identifier, system
 340      * identifier, byte stream, and character stream, are {@code null}.
 341      * </li>
 342      * <li>The public identifier and system identifier are  {@code null}, and
 343      * byte and character stream are either  {@code null} or contain no byte
 344      * or character.
 345      * <p>
 346      * Note that this method will reset the byte stream if it is provided, or
 347      * the character stream if the byte stream is not provided.
 348      * </li>
 349      * </ul>
 350      * <p>
 351      * In case of error while checking the byte or character stream, the method
 352      * will return false to allow the XML processor to handle the error.
 353      *
 354      * @return true if the {@code InputSource} object is empty, false otherwise
 355      */
 356     public boolean isEmpty() {
 357         return (publicId == null && systemId == null && isStreamEmpty());
 358     }
 359 
 360     private boolean isStreamEmpty() {
 361         boolean empty = true;
 362         try {
 363             if (byteStream != null) {
 364                 byteStream.reset();
 365                 int bytesRead = byteStream.available();
 366                 if (bytesRead > 0) {
 367                     return false;
 368                 }
 369             }
 370 
 371             if (characterStream != null) {
 372                 characterStream.reset();
 373                 int c = characterStream.read();
 374                 characterStream.reset();
 375                 if (c != -1) {
 376                     return false;
 377                 }
 378             }
 379         } catch (IOException ex) {
 380             //in case of error, return false
 381             return false;
 382         }
 383 
 384         return empty;
 385     }
 386     ////////////////////////////////////////////////////////////////////
 387     // Internal state.
 388     ////////////////////////////////////////////////////////////////////
 389 
 390     private String publicId;
 391     private String systemId;
 392     private InputStream byteStream;
 393     private String encoding;
 394     private Reader characterStream;
 395 
 396 }
 397 
 398 // end of InputSource.java