1 /*
   2  * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 // SAX input source.
  27 // http://www.saxproject.org
  28 // No warranty; no copyright -- use this as you will.
  29 // $Id: InputSource.java,v 1.2 2004/11/03 22:55:32 jsuttor Exp $
  30 
  31 package org.xml.sax;
  32 
  33 import java.io.IOException;
  34 import java.io.Reader;
  35 import java.io.InputStream;
  36 
  37 /**
  38  * A single input source for an XML entity.
  39  *
  40  * <blockquote>
  41  * <em>This module, both source code and documentation, is in the
  42  * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
  43  * See <a href='http://www.saxproject.org'>http://www.saxproject.org</a>
  44  * for further information.
  45  * </blockquote>
  46  *
  47  * <p>This class allows a SAX application to encapsulate information
  48  * about an input source in a single object, which may include
  49  * a public identifier, a system identifier, a byte stream (possibly
  50  * with a specified encoding), and/or a character stream.</p>
  51  *
  52  * <p>There are two places that the application can deliver an
  53  * input source to the parser: as the argument to the Parser.parse
  54  * method, or as the return value of the EntityResolver.resolveEntity
  55  * method.</p>
  56  *
  57  * <p>The SAX parser will use the InputSource object to determine how
  58  * to read XML input.  If there is a character stream available, the
  59  * parser will read that stream directly, disregarding any text
  60  * encoding declaration found in that stream.
  61  * If there is no character stream, but there is
  62  * a byte stream, the parser will use that byte stream, using the
  63  * encoding specified in the InputSource or else (if no encoding is
  64  * specified) autodetecting the character encoding using an algorithm
  65  * such as the one in the XML specification.  If neither a character
  66  * stream nor a
  67  * byte stream is available, the parser will attempt to open a URI
  68  * connection to the resource identified by the system
  69  * identifier.</p>
  70  *
  71  * <p>An InputSource object belongs to the application: the SAX parser
  72  * shall never modify it in any way (it may modify a copy if
  73  * necessary).  However, standard processing of both byte and
  74  * character streams is to close them on as part of end-of-parse cleanup,
  75  * so applications should not attempt to re-use such streams after they
  76  * have been handed to a parser.  </p>
  77  *
  78  * @since 1.4, SAX 1.0
  79  * @author David Megginson
  80  * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
  81  * @see org.xml.sax.EntityResolver#resolveEntity
  82  * @see java.io.InputStream
  83  * @see java.io.Reader
  84  */
  85 public class InputSource {
  86 
  87     /**
  88      * Zero-argument default constructor.
  89      *
  90      * @see #setPublicId
  91      * @see #setSystemId
  92      * @see #setByteStream
  93      * @see #setCharacterStream
  94      * @see #setEncoding
  95      */
  96     public InputSource ()
  97     {
  98     }
  99 
 100 
 101     /**
 102      * Create a new input source with a system identifier.
 103      *
 104      * <p>Applications may use setPublicId to include a
 105      * public identifier as well, or setEncoding to specify
 106      * the character encoding, if known.</p>
 107      *
 108      * <p>If the system identifier is a URL, it must be fully
 109      * resolved (it may not be a relative URL).</p>
 110      *
 111      * @param systemId The system identifier (URI).
 112      * @see #setPublicId
 113      * @see #setSystemId
 114      * @see #setByteStream
 115      * @see #setEncoding
 116      * @see #setCharacterStream
 117      */
 118     public InputSource (String systemId)
 119     {
 120         setSystemId(systemId);
 121     }
 122 
 123 
 124     /**
 125      * Create a new input source with a byte stream.
 126      *
 127      * <p>Application writers should use setSystemId() to provide a base
 128      * for resolving relative URIs, may use setPublicId to include a
 129      * public identifier, and may use setEncoding to specify the object's
 130      * character encoding.</p>
 131      *
 132      * @param byteStream The raw byte stream containing the document.
 133      * @see #setPublicId
 134      * @see #setSystemId
 135      * @see #setEncoding
 136      * @see #setByteStream
 137      * @see #setCharacterStream
 138      */
 139     public InputSource (InputStream byteStream)
 140     {
 141         setByteStream(byteStream);
 142     }
 143 
 144 
 145     /**
 146      * Create a new input source with a character stream.
 147      *
 148      * <p>Application writers should use setSystemId() to provide a base
 149      * for resolving relative URIs, and may use setPublicId to include a
 150      * public identifier.</p>
 151      *
 152      * <p>The character stream shall not include a byte order mark.</p>
 153      *
 154      * @see #setPublicId
 155      * @see #setSystemId
 156      * @see #setByteStream
 157      * @see #setCharacterStream
 158      */
 159     public InputSource (Reader characterStream)
 160     {
 161         setCharacterStream(characterStream);
 162     }
 163 
 164 
 165     /**
 166      * Set the public identifier for this input source.
 167      *
 168      * <p>The public identifier is always optional: if the application
 169      * writer includes one, it will be provided as part of the
 170      * location information.</p>
 171      *
 172      * @param publicId The public identifier as a string.
 173      * @see #getPublicId
 174      * @see org.xml.sax.Locator#getPublicId
 175      * @see org.xml.sax.SAXParseException#getPublicId
 176      */
 177     public void setPublicId (String publicId)
 178     {
 179         this.publicId = publicId;
 180     }
 181 
 182 
 183     /**
 184      * Get the public identifier for this input source.
 185      *
 186      * @return The public identifier, or null if none was supplied.
 187      * @see #setPublicId
 188      */
 189     public String getPublicId ()
 190     {
 191         return publicId;
 192     }
 193 
 194 
 195     /**
 196      * Set the system identifier for this input source.
 197      *
 198      * <p>The system identifier is optional if there is a byte stream
 199      * or a character stream, but it is still useful to provide one,
 200      * since the application can use it to resolve relative URIs
 201      * and can include it in error messages and warnings (the parser
 202      * will attempt to open a connection to the URI only if
 203      * there is no byte stream or character stream specified).</p>
 204      *
 205      * <p>If the application knows the character encoding of the
 206      * object pointed to by the system identifier, it can register
 207      * the encoding using the setEncoding method.</p>
 208      *
 209      * <p>If the system identifier is a URL, it must be fully
 210      * resolved (it may not be a relative URL).</p>
 211      *
 212      * @param systemId The system identifier as a string.
 213      * @see #setEncoding
 214      * @see #getSystemId
 215      * @see org.xml.sax.Locator#getSystemId
 216      * @see org.xml.sax.SAXParseException#getSystemId
 217      */
 218     public void setSystemId (String systemId)
 219     {
 220         this.systemId = systemId;
 221     }
 222 
 223 
 224     /**
 225      * Get the system identifier for this input source.
 226      *
 227      * <p>The getEncoding method will return the character encoding
 228      * of the object pointed to, or null if unknown.</p>
 229      *
 230      * <p>If the system ID is a URL, it will be fully resolved.</p>
 231      *
 232      * @return The system identifier, or null if none was supplied.
 233      * @see #setSystemId
 234      * @see #getEncoding
 235      */
 236     public String getSystemId ()
 237     {
 238         return systemId;
 239     }
 240 
 241 
 242     /**
 243      * Set the byte stream for this input source.
 244      *
 245      * <p>The SAX parser will ignore this if there is also a character
 246      * stream specified, but it will use a byte stream in preference
 247      * to opening a URI connection itself.</p>
 248      *
 249      * <p>If the application knows the character encoding of the
 250      * byte stream, it should set it with the setEncoding method.</p>
 251      *
 252      * @param byteStream A byte stream containing an XML document or
 253      *        other entity.
 254      * @see #setEncoding
 255      * @see #getByteStream
 256      * @see #getEncoding
 257      * @see java.io.InputStream
 258      */
 259     public void setByteStream (InputStream byteStream)
 260     {
 261         this.byteStream = byteStream;
 262     }
 263 
 264 
 265     /**
 266      * Get the byte stream for this input source.
 267      *
 268      * <p>The getEncoding method will return the character
 269      * encoding for this byte stream, or null if unknown.</p>
 270      *
 271      * @return The byte stream, or null if none was supplied.
 272      * @see #getEncoding
 273      * @see #setByteStream
 274      */
 275     public InputStream getByteStream ()
 276     {
 277         return byteStream;
 278     }
 279 
 280 
 281     /**
 282      * Set the character encoding, if known.
 283      *
 284      * <p>The encoding must be a string acceptable for an
 285      * XML encoding declaration (see section 4.3.3 of the XML 1.0
 286      * recommendation).</p>
 287      *
 288      * <p>This method has no effect when the application provides a
 289      * character stream.</p>
 290      *
 291      * @param encoding A string describing the character encoding.
 292      * @see #setSystemId
 293      * @see #setByteStream
 294      * @see #getEncoding
 295      */
 296     public void setEncoding (String encoding)
 297     {
 298         this.encoding = encoding;
 299     }
 300 
 301 
 302     /**
 303      * Get the character encoding for a byte stream or URI.
 304      * This value will be ignored when the application provides a
 305      * character stream.
 306      *
 307      * @return The encoding, or null if none was supplied.
 308      * @see #setByteStream
 309      * @see #getSystemId
 310      * @see #getByteStream
 311      */
 312     public String getEncoding ()
 313     {
 314         return encoding;
 315     }
 316 
 317 
 318     /**
 319      * Set the character stream for this input source.
 320      *
 321      * <p>If there is a character stream specified, the SAX parser
 322      * will ignore any byte stream and will not attempt to open
 323      * a URI connection to the system identifier.</p>
 324      *
 325      * @param characterStream The character stream containing the
 326      *        XML document or other entity.
 327      * @see #getCharacterStream
 328      * @see java.io.Reader
 329      */
 330     public void setCharacterStream (Reader characterStream)
 331     {
 332         this.characterStream = characterStream;
 333     }
 334 
 335 
 336     /**
 337      * Get the character stream for this input source.
 338      *
 339      * @return The character stream, or null if none was supplied.
 340      * @see #setCharacterStream
 341      */
 342     public Reader getCharacterStream ()
 343     {
 344         return characterStream;
 345     }
 346 
 347     /**
 348      * Indicates whether the {@code InputSource} object is empty. Empty is
 349      * defined as follows:
 350      * <ul>
 351      * <li>All of the input sources, including the public identifier, system
 352      * identifier, byte stream, and character stream, are {@code null}.
 353      * </li>
 354      * <li>The public identifier and system identifier are  {@code null}, and
 355      * byte and character stream are either  {@code null} or contain no byte
 356      * or character.
 357      * <p>
 358      * Note that this method will reset the byte stream if it is provided, or
 359      * the character stream if the byte stream is not provided.
 360      * </li>
 361      * </ul>
 362      * <p>
 363      * In case of error while checking the byte or character stream, the method
 364      * will return false to allow the XML processor to handle the error.
 365      *
 366      * @return true if the {@code InputSource} object is empty, false otherwise
 367      */
 368     public boolean isEmpty() {
 369         return (publicId == null && systemId == null && isStreamEmpty());
 370     }
 371 
 372     private boolean isStreamEmpty() {
 373         boolean empty = true;
 374         try {
 375             if (byteStream != null) {
 376                 byteStream.reset();
 377                 int bytesRead = byteStream.available();
 378                 if (bytesRead > 0) {
 379                     return false;
 380                 }
 381             }
 382 
 383             if (characterStream != null) {
 384                 characterStream.reset();
 385                 int c = characterStream.read();
 386                 characterStream.reset();
 387                 if (c != -1) {
 388                     return false;
 389                 }
 390             }
 391         } catch (IOException ex) {
 392             //in case of error, return false
 393             return false;
 394         }
 395 
 396         return empty;
 397     }
 398     ////////////////////////////////////////////////////////////////////
 399     // Internal state.
 400     ////////////////////////////////////////////////////////////////////
 401 
 402     private String publicId;
 403     private String systemId;
 404     private InputStream byteStream;
 405     private String encoding;
 406     private Reader characterStream;
 407 
 408 }
 409 
 410 // end of InputSource.java