< prev index next >

src/jdk.xml.bind/share/classes/com/sun/xml/internal/dtdparser/Resolver.java

Print this page


   1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 
  31 import java.io.File;
  32 import java.io.FileInputStream;
  33 import java.io.IOException;
  34 import java.io.InputStream;
  35 import java.net.URL;
  36 import java.net.URLConnection;
  37 import java.util.Hashtable;

  38 
  39 /**
  40  * This entity resolver class provides a number of utilities which can help
  41  * managment of external parsed entities in XML.  These are commonly used
  42  * to hold markup declarations that are to be used as part of a Document
  43  * Type Declaration (DTD), or to hold text marked up with XML.
  44  * <p/>
  45  * <P> Features include: <UL>
  46  * <p/>
  47  * <LI> Static factory methods are provided for constructing SAX InputSource
  48  * objects from Files, URLs, or MIME objects.  This eliminates a class of
  49  * error-prone coding in applications.
  50  * <p/>
  51  * <LI> Character encodings for XML documents are correctly supported: <UL>
  52  * <p/>
  53  * <LI> The encodings defined in the RFCs for MIME content types
  54  * (2046 for general MIME, and 2376 for XML in particular), are
  55  * supported, handling <em>charset=...</em> attributes and accepting
  56  * content types which are known to be safe for use with XML;
  57  * <p/>
  58  * <LI> The character encoding autodetection algorithm identified
  59  * in the XML specification is used, and leverages all of
  60  * the JDK 1.1 (and later) character encoding support.
  61  * <p/>
  62  * <LI> The use of MIME typing may optionally be disabled, forcing the
  63  * use of autodetection, to support web servers which don't correctly
  64  * report MIME types for XML.  For example, they may report text that
  65  * is encoded in EUC-JP as being US-ASCII text, leading to fatal
  66  * errors during parsing.
  67  * <p/>
  68  * <LI> The InputSource objects returned by this class always
  69  * have a <code>java.io.Reader</code> available as the "character
  70  * stream" property.
  71  * <p/>
  72  * </UL>
  73  * <p/>
  74  * <LI> Catalog entries can map public identifiers to Java resources or
  75  * to local URLs.  These are used to reduce network dependencies and loads,
  76  * and will often be used for external DTD components.  For example, packages
  77  * shipping DTD files as resources in JAR files can eliminate network traffic
  78  * when accessing them, and sites may provide local caches of common DTDs.
  79  * Note that no particular catalog syntax is supported by this class, only
  80  * the notion of a set of entries.
  81  * <p/>
  82  * </UL>
  83  * <p/>
  84  * <P> Subclasses can perform tasks such as supporting new URI schemes for
  85  * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
  86  * MIME entities which are part of a <em>multipart/related</em> group
  87  * (see RFC 2387).  They may also be used to support particular catalog
  88  * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
  89  * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
  90  * Public Identifiers (FPIs).
  91  *
  92  * @author David Brownell
  93  * @author Janet Koenig
  94  * @version 1.3 00/02/24
  95  */
  96 public class Resolver implements EntityResolver {
  97     private boolean ignoringMIME;
  98 
  99     // table mapping public IDs to (local) URIs
 100     private Hashtable id2uri;
 101 
 102     // tables mapping public IDs to resources and classloaders
 103     private Hashtable id2resource;


 109     // idea is to rule out obvious braindamage ("image/jpg")
 110     // not the subtle stuff ("text/html") that might actually
 111     // be (or become) safe.
 112     //
 113     private static final String types [] = {
 114         "application/xml",
 115         "text/xml",
 116         "text/plain",
 117         "text/html", // commonly mis-inferred
 118         "application/x-netcdf", // this is often illegal XML
 119         "content/unknown"
 120     };
 121 
 122     /**
 123      * Constructs a resolver.
 124      */
 125     public Resolver() {
 126     }
 127 
 128     /**
 129      * Returns an input source, using the MIME type information and URL
 130      * scheme to statically determine the correct character encoding if
 131      * possible and otherwise autodetecting it.  MIME carefully specifies
 132      * the character encoding defaults, and how attributes of the content
 133      * type can change it.  XML further specifies two mandatory encodings
 134      * (UTF-8 and UTF-16), and includes an XML declaration which can be
 135      * used to internally label most documents encoded using US-ASCII
 136      * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
 137      * more).
 138      * <p/>
 139      * <P> This method can be used to access XML documents which do not
 140      * have URIs (such as servlet input streams, or most JavaMail message
 141      * entities) and to support access methods such as HTTP POST or PUT.
 142      * (URLs normally return content using the GET method.)
 143      * <p/>
 144      * <P> <em> The caller should set the system ID in order for relative URIs
 145      * found in this document to be interpreted correctly.</em> In some cases,
 146      * a custom resolver will need to be used; for example, documents
 147      * may be grouped in a single MIME "multipart/related" bundle, and
 148      * relative URLs would refer to other documents in that bundle.
 149      *
 150      * @param contentType The MIME content type for the source for which
 151      *                    an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
 152      * @param stream      The input byte stream for the input source.
 153      * @param checkType   If true, this verifies that the content type is known
 154      *                    to support XML documents, such as <em>application/xml</em>.
 155      * @param scheme      Unless this is "file", unspecified MIME types
 156      *                    default to US-ASCII.  Files are always autodetected since most
 157      *                    file systems discard character encoding information.
 158      */
 159     public static InputSource createInputSource(String contentType,
 160                                                 InputStream stream,
 161                                                 boolean checkType,
 162                                                 String scheme) throws IOException {
 163         InputSource retval;
 164         String charset = null;
 165 
 166         if (contentType != null) {
 167             int index;
 168 
 169             contentType = contentType.toLowerCase();
 170             index = contentType.indexOf(';');
 171             if (index != -1) {
 172                 String attributes;
 173 
 174                 attributes = contentType.substring(index + 1);
 175                 contentType = contentType.substring(0, index);
 176 
 177                 // use "charset=..." if it's available
 178                 index = attributes.indexOf("charset");
 179                 if (index != -1) {
 180                     attributes = attributes.substring(index + 7);
 181                     // strip out subsequent attributes
 182                     if ((index = attributes.indexOf(';')) != -1)
 183                         attributes = attributes.substring(0, index);
 184                     // find start of value
 185                     if ((index = attributes.indexOf('=')) != -1) {
 186                         attributes = attributes.substring(index + 1);
 187                         // strip out rfc822 comments
 188                         if ((index = attributes.indexOf('(')) != -1)
 189                             attributes = attributes.substring(0, index);


 278         if (File.separatorChar != '/')
 279             path = path.replace(File.separatorChar, '/');
 280         if (!path.startsWith("/"))
 281             path = "/" + path;
 282         if (!path.endsWith("/") && file.isDirectory())
 283             path = path + "/";
 284 
 285         retval.setSystemId("file:" + path);
 286         return retval;
 287     }
 288 
 289 
 290     /**
 291      * <b>SAX:</b>
 292      * Resolve the given entity into an input source.  If the name can't
 293      * be mapped to a preferred form of the entity, the URI is used.  To
 294      * resolve the entity, first a local catalog mapping names to URIs is
 295      * consulted.  If no mapping is found there, a catalog mapping names
 296      * to java resources is consulted.  Finally, if neither mapping found
 297      * a copy of the entity, the specified URI is used.
 298      * <p/>
 299      * <P> When a URI is used, <a href="#createInputSource">
 300      * createInputSource</a> is used to correctly deduce the character
 301      * encoding used by this entity.  No MIME type checking is done.
 302      *
 303      * @param name Used to find alternate copies of the entity, when
 304      *             this value is non-null; this is the XML "public ID".
 305      * @param uri  Used when no alternate copy of the entity is found;
 306      *             this is the XML "system ID", normally a URI.
 307      */

 308     public InputSource resolveEntity(String name, String uri)
 309             throws IOException {
 310         InputSource retval;
 311         String mappedURI = name2uri(name);
 312         InputStream stream;
 313 
 314         // prefer explicit URI mappings, then bundled resources...
 315         if (mappedURI == null && (stream = mapResource(name)) != null) {
 316             uri = "java:resource:" + (String) id2resource.get(name);
 317             retval = new InputSource(XmlReader.createReader(stream));
 318 
 319             // ...and treat all URIs the same (as URLs for now).
 320         } else {
 321             URL url;
 322             URLConnection conn;
 323 
 324             if (mappedURI != null)
 325                 uri = mappedURI;
 326             else if (uri == null)
 327                 return null;
 328 
 329             url = new URL(uri);
 330             conn = url.openConnection();
 331             uri = conn.getURL().toString();
 332             // System.out.println ("++ URI: " + url);
 333             if (ignoringMIME)
 334                 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
 335             else {


 406 
 407         if (resourceName == null)
 408             return null;
 409         // System.out.println ("++ Resource: " + resourceName);
 410 
 411         if (id2loader != null)
 412             loader = (ClassLoader) id2loader.get(publicId);
 413         // System.out.println ("++ Loader: " + loader);
 414         if (loader == null)
 415             return ClassLoader.getSystemResourceAsStream(resourceName);
 416         return loader.getResourceAsStream(resourceName);
 417     }
 418 
 419     /**
 420      * Registers a given public ID as corresponding to a particular Java
 421      * resource in a given class loader, typically distributed with a
 422      * software package.  This resource will be preferred over system IDs
 423      * included in XML documents.  This mechanism should most typically be
 424      * used for Document Type Definitions (DTDs), where the public IDs are
 425      * formally managed and versioned.
 426      * <p/>
 427      * <P> If a mapping to a URI has been provided, that mapping takes
 428      * precedence over this one.
 429      *
 430      * @param publicId     The managed public ID being mapped
 431      * @param resourceName The name of the Java resource
 432      * @param loader       The class loader holding the resource, or null if
 433      *                     it is a system resource.
 434      */
 435     public void registerCatalogEntry(String publicId,
 436                                      String resourceName,
 437                                      ClassLoader loader) {
 438         if (id2resource == null)
 439             id2resource = new Hashtable(17);
 440         id2resource.put(publicId, resourceName);
 441 
 442         if (loader != null) {
 443             if (id2loader == null)
 444                 id2loader = new Hashtable(17);
 445             id2loader.put(publicId, loader);
 446         }
   1 /*
   2  * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.EntityResolver;
  29 import org.xml.sax.InputSource;
  30 
  31 import java.io.File;
  32 import java.io.FileInputStream;
  33 import java.io.IOException;
  34 import java.io.InputStream;
  35 import java.net.URL;
  36 import java.net.URLConnection;
  37 import java.util.Hashtable;
  38 import java.util.Locale;
  39 
  40 /**
  41  * This entity resolver class provides a number of utilities which can help
  42  * managment of external parsed entities in XML.  These are commonly used
  43  * to hold markup declarations that are to be used as part of a Document
  44  * Type Declaration (DTD), or to hold text marked up with XML.
  45  * <p>
  46  * <P> Features include: <UL>
  47  *
  48  * <LI> Static factory methods are provided for constructing SAX InputSource
  49  * objects from Files, URLs, or MIME objects.  This eliminates a class of
  50  * error-prone coding in applications.</LI>
  51  *
  52  * <LI> Character encodings for XML documents are correctly supported:<UL>
  53  *
  54  * <LI> The encodings defined in the RFCs for MIME content types
  55  * (2046 for general MIME, and 2376 for XML in particular), are
  56  * supported, handling <em>charset=...</em> attributes and accepting
  57  * content types which are known to be safe for use with XML;</LI>
  58  *
  59  * <LI> The character encoding autodetection algorithm identified
  60  * in the XML specification is used, and leverages all of
  61  * the JDK 1.1 (and later) character encoding support.</LI>
  62  *
  63  * <LI> The use of MIME typing may optionally be disabled, forcing the
  64  * use of autodetection, to support web servers which don't correctly
  65  * report MIME types for XML.  For example, they may report text that
  66  * is encoded in EUC-JP as being US-ASCII text, leading to fatal
  67  * errors during parsing.</LI>
  68  *
  69  * <LI> The InputSource objects returned by this class always
  70  * have a <code>java.io.Reader</code> available as the "character
  71  * stream" property.</LI>
  72  *
  73  * </UL></LI>
  74  *
  75  * <LI> Catalog entries can map public identifiers to Java resources or
  76  * to local URLs.  These are used to reduce network dependencies and loads,
  77  * and will often be used for external DTD components.  For example, packages
  78  * shipping DTD files as resources in JAR files can eliminate network traffic
  79  * when accessing them, and sites may provide local caches of common DTDs.
  80  * Note that no particular catalog syntax is supported by this class, only
  81  * the notion of a set of entries.</LI>
  82  *
  83  * </UL>
  84  * <p>
  85  * <P> Subclasses can perform tasks such as supporting new URI schemes for
  86  * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
  87  * MIME entities which are part of a <em>multipart/related</em> group
  88  * (see RFC 2387).  They may also be used to support particular catalog
  89  * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
  90  * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
  91  * Public Identifiers (FPIs).
  92  *
  93  * @author David Brownell
  94  * @author Janet Koenig
  95  * @version 1.3 00/02/24
  96  */
  97 public class Resolver implements EntityResolver {
  98     private boolean ignoringMIME;
  99 
 100     // table mapping public IDs to (local) URIs
 101     private Hashtable id2uri;
 102 
 103     // tables mapping public IDs to resources and classloaders
 104     private Hashtable id2resource;


 110     // idea is to rule out obvious braindamage ("image/jpg")
 111     // not the subtle stuff ("text/html") that might actually
 112     // be (or become) safe.
 113     //
 114     private static final String types [] = {
 115         "application/xml",
 116         "text/xml",
 117         "text/plain",
 118         "text/html", // commonly mis-inferred
 119         "application/x-netcdf", // this is often illegal XML
 120         "content/unknown"
 121     };
 122 
 123     /**
 124      * Constructs a resolver.
 125      */
 126     public Resolver() {
 127     }
 128 
 129     /**
 130      * <p>Returns an input source, using the MIME type information and URL
 131      * scheme to statically determine the correct character encoding if
 132      * possible and otherwise autodetecting it.  MIME carefully specifies
 133      * the character encoding defaults, and how attributes of the content
 134      * type can change it.  XML further specifies two mandatory encodings
 135      * (UTF-8 and UTF-16), and includes an XML declaration which can be
 136      * used to internally label most documents encoded using US-ASCII
 137      * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
 138      * more).</p>
 139      *
 140      * <p> This method can be used to access XML documents which do not
 141      * have URIs (such as servlet input streams, or most JavaMail message
 142      * entities) and to support access methods such as HTTP POST or PUT.
 143      * (URLs normally return content using the GET method.)</p>
 144      *
 145      * <p> <em> The caller should set the system ID in order for relative URIs
 146      * found in this document to be interpreted correctly.</em> In some cases,
 147      * a custom resolver will need to be used; for example, documents
 148      * may be grouped in a single MIME "multipart/related" bundle, and
 149      * relative URLs would refer to other documents in that bundle.</p>
 150      *
 151      * @param contentType The MIME content type for the source for which
 152      *                    an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
 153      * @param stream      The input byte stream for the input source.
 154      * @param checkType   If true, this verifies that the content type is known
 155      *                    to support XML documents, such as <em>application/xml</em>.
 156      * @param scheme      Unless this is "file", unspecified MIME types
 157      *                    default to US-ASCII.  Files are always autodetected since most
 158      *                    file systems discard character encoding information.
 159      */
 160     public static InputSource createInputSource(String contentType,
 161                                                 InputStream stream,
 162                                                 boolean checkType,
 163                                                 String scheme) throws IOException {
 164         InputSource retval;
 165         String charset = null;
 166 
 167         if (contentType != null) {
 168             int index;
 169 
 170             contentType = contentType.toLowerCase(Locale.ENGLISH);
 171             index = contentType.indexOf(';');
 172             if (index != -1) {
 173                 String attributes;
 174 
 175                 attributes = contentType.substring(index + 1);
 176                 contentType = contentType.substring(0, index);
 177 
 178                 // use "charset=..." if it's available
 179                 index = attributes.indexOf("charset");
 180                 if (index != -1) {
 181                     attributes = attributes.substring(index + 7);
 182                     // strip out subsequent attributes
 183                     if ((index = attributes.indexOf(';')) != -1)
 184                         attributes = attributes.substring(0, index);
 185                     // find start of value
 186                     if ((index = attributes.indexOf('=')) != -1) {
 187                         attributes = attributes.substring(index + 1);
 188                         // strip out rfc822 comments
 189                         if ((index = attributes.indexOf('(')) != -1)
 190                             attributes = attributes.substring(0, index);


 279         if (File.separatorChar != '/')
 280             path = path.replace(File.separatorChar, '/');
 281         if (!path.startsWith("/"))
 282             path = "/" + path;
 283         if (!path.endsWith("/") && file.isDirectory())
 284             path = path + "/";
 285 
 286         retval.setSystemId("file:" + path);
 287         return retval;
 288     }
 289 
 290 
 291     /**
 292      * <b>SAX:</b>
 293      * Resolve the given entity into an input source.  If the name can't
 294      * be mapped to a preferred form of the entity, the URI is used.  To
 295      * resolve the entity, first a local catalog mapping names to URIs is
 296      * consulted.  If no mapping is found there, a catalog mapping names
 297      * to java resources is consulted.  Finally, if neither mapping found
 298      * a copy of the entity, the specified URI is used.
 299      * <p>
 300      * <P> When a URI is used, <a href="#createInputSource">
 301      * createInputSource</a> is used to correctly deduce the character
 302      * encoding used by this entity.  No MIME type checking is done.
 303      *
 304      * @param name Used to find alternate copies of the entity, when
 305      *             this value is non-null; this is the XML "public ID".
 306      * @param uri  Used when no alternate copy of the entity is found;
 307      *             this is the XML "system ID", normally a URI.
 308      */
 309     @Override
 310     public InputSource resolveEntity(String name, String uri)
 311             throws IOException {
 312         InputSource retval;
 313         String mappedURI = name2uri(name);
 314         InputStream stream;
 315 
 316         // prefer explicit URI mappings, then bundled resources...
 317         if (mappedURI == null && (stream = mapResource(name)) != null && id2resource != null) {
 318             uri = "java:resource:" + (String) id2resource.get(name);
 319             retval = new InputSource(XmlReader.createReader(stream));
 320 
 321             // ...and treat all URIs the same (as URLs for now).
 322         } else {
 323             URL url;
 324             URLConnection conn;
 325 
 326             if (mappedURI != null)
 327                 uri = mappedURI;
 328             else if (uri == null)
 329                 return null;
 330 
 331             url = new URL(uri);
 332             conn = url.openConnection();
 333             uri = conn.getURL().toString();
 334             // System.out.println ("++ URI: " + url);
 335             if (ignoringMIME)
 336                 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
 337             else {


 408 
 409         if (resourceName == null)
 410             return null;
 411         // System.out.println ("++ Resource: " + resourceName);
 412 
 413         if (id2loader != null)
 414             loader = (ClassLoader) id2loader.get(publicId);
 415         // System.out.println ("++ Loader: " + loader);
 416         if (loader == null)
 417             return ClassLoader.getSystemResourceAsStream(resourceName);
 418         return loader.getResourceAsStream(resourceName);
 419     }
 420 
 421     /**
 422      * Registers a given public ID as corresponding to a particular Java
 423      * resource in a given class loader, typically distributed with a
 424      * software package.  This resource will be preferred over system IDs
 425      * included in XML documents.  This mechanism should most typically be
 426      * used for Document Type Definitions (DTDs), where the public IDs are
 427      * formally managed and versioned.
 428      * <p>
 429      * <P> If a mapping to a URI has been provided, that mapping takes
 430      * precedence over this one.
 431      *
 432      * @param publicId     The managed public ID being mapped
 433      * @param resourceName The name of the Java resource
 434      * @param loader       The class loader holding the resource, or null if
 435      *                     it is a system resource.
 436      */
 437     public void registerCatalogEntry(String publicId,
 438                                      String resourceName,
 439                                      ClassLoader loader) {
 440         if (id2resource == null)
 441             id2resource = new Hashtable(17);
 442         id2resource.put(publicId, resourceName);
 443 
 444         if (loader != null) {
 445             if (id2loader == null)
 446                 id2loader = new Hashtable(17);
 447             id2loader.put(publicId, loader);
 448         }
< prev index next >