1 /*
   2  * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.util;
  22 
  23 import java.io.IOException;
  24 
  25 import org.xml.sax.InputSource;
  26 import org.xml.sax.SAXException;
  27 import org.xml.sax.ext.EntityResolver2;
  28 
  29 import org.w3c.dom.ls.LSInput;
  30 import org.w3c.dom.ls.LSResourceResolver;
  31 
  32 import javax.xml.parsers.SAXParserFactory;
  33 
  34 import com.sun.org.apache.xerces.internal.dom.DOMInputImpl;
  35 import com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl;
  36 
  37 import com.sun.org.apache.xerces.internal.xni.XNIException;
  38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
  39 
  40 import com.sun.org.apache.xerces.internal.xni.parser.XMLEntityResolver;
  41 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
  42 
  43 import com.sun.org.apache.xml.internal.resolver.Catalog;
  44 import com.sun.org.apache.xml.internal.resolver.CatalogManager;
  45 import com.sun.org.apache.xml.internal.resolver.readers.OASISXMLCatalogReader;
  46 import com.sun.org.apache.xml.internal.resolver.readers.SAXCatalogReader;
  47 import jdk.xml.internal.JdkXmlUtils;
  48 
  49 /**
  50  * <p>The catalog resolver handles the resolution of external
  51  * identifiers and URI references through XML catalogs. This
  52  * component supports XML catalogs defined by the
  53  * <a href="http://www.oasis-open.org/committees/entity/spec.html">
  54  * OASIS XML Catalogs Specification</a>. It encapsulates the
  55  * <a href="http://xml.apache.org/commons/">XML Commons</a> resolver.
  56  * An instance of this class may be registered on the parser
  57  * as a SAX entity resolver, as a DOM LSResourceResolver or
  58  * as an XNI entity resolver by setting the property
  59  * (http://apache.org/xml/properties/internal/entity-resolver).</p>
  60  *
  61  * <p>It is intended that this class may be used standalone to perform
  62  * catalog resolution outside of a parsing context. It may be shared
  63  * between several parsers and the application.</p>
  64  *
  65  * @author Michael Glavassevich, IBM
  66  *
  67  */
  68 public class XMLCatalogResolver
  69     implements XMLEntityResolver, EntityResolver2, LSResourceResolver {
  70 
  71     /** Internal catalog manager for Apache catalogs. **/
  72     private CatalogManager fResolverCatalogManager = null;
  73 
  74     /** Internal catalog structure. **/
  75     private Catalog fCatalog = null;
  76 
  77     /** An array of catalog URIs. **/
  78     private String [] fCatalogsList = null;
  79 
  80     /**
  81      * Indicates whether the list of catalogs has
  82      * changed since it was processed.
  83      */
  84     private boolean fCatalogsChanged = true;
  85 
  86     /** Application specified prefer public setting. **/
  87     private boolean fPreferPublic = true;
  88 
  89     /**
  90      * Indicates whether the application desires that
  91      * the parser or some other component performing catalog
  92      * resolution should use the literal system identifier
  93      * instead of the expanded system identifier.
  94      */
  95     private boolean fUseLiteralSystemId = true;
  96 
  97     /**
  98      * <p>Constructs a catalog resolver with a default configuration.</p>
  99      */
 100     public XMLCatalogResolver () {
 101         this(null, true);
 102     }
 103 
 104     /**
 105      * <p>Constructs a catalog resolver with the given
 106      * list of entry files.</p>
 107      *
 108      * @param catalogs an ordered array list of absolute URIs
 109      */
 110     public XMLCatalogResolver (String [] catalogs) {
 111         this(catalogs, true);
 112     }
 113 
 114     /**
 115      * <p>Constructs a catalog resolver with the given
 116      * list of entry files and the preference for whether
 117      * system or public matches are preferred.</p>
 118      *
 119      * @param catalogs an ordered array list of absolute URIs
 120      * @param preferPublic the prefer public setting
 121      */
 122     public XMLCatalogResolver (String [] catalogs, boolean preferPublic) {
 123         init(catalogs, preferPublic);
 124     }
 125 
 126     /**
 127      * <p>Returns the initial list of catalog entry files.</p>
 128      *
 129      * @return the initial list of catalog entry files
 130      */
 131     public final synchronized String [] getCatalogList () {
 132         return (fCatalogsList != null)
 133             ? (String[]) fCatalogsList.clone() : null;
 134     }
 135 
 136     /**
 137      * <p>Sets the initial list of catalog entry files.
 138      * If there were any catalog mappings cached from
 139      * the previous list they will be replaced by catalog
 140      * mappings from the new list the next time the catalog
 141      * is queried.</p>
 142      *
 143      * @param catalogs an ordered array list of absolute URIs
 144      */
 145     public final synchronized void setCatalogList (String [] catalogs) {
 146         fCatalogsChanged = true;
 147         fCatalogsList = (catalogs != null)
 148             ? (String[]) catalogs.clone() : null;
 149     }
 150 
 151     /**
 152      * <p>Forces the cache of catalog mappings to be cleared.</p>
 153      */
 154     public final synchronized void clear () {
 155         fCatalog = null;
 156     }
 157 
 158     /**
 159      * <p>Returns the preference for whether system or public
 160      * matches are preferred. This is used in the absence
 161      * of any occurence of the <code>prefer</code> attribute
 162      * on the <code>catalog</code> entry of a catalog. If this
 163      * property has not yet been explicitly set its value is
 164      * <code>true</code>.</p>
 165      *
 166      * @return the prefer public setting
 167      */
 168     public final boolean getPreferPublic () {
 169         return fPreferPublic;
 170     }
 171 
 172     /**
 173      * <p>Sets the preference for whether system or public
 174      * matches are preferred. This is used in the absence
 175      * of any occurence of the <code>prefer</code> attribute
 176      * on the <code>catalog</code> entry of a catalog.</p>
 177      *
 178      * @param preferPublic the prefer public setting
 179      */
 180     public final void setPreferPublic (boolean preferPublic) {
 181         fPreferPublic = preferPublic;
 182         fResolverCatalogManager.setPreferPublic(preferPublic);
 183     }
 184 
 185     /**
 186      * <p>Returns the preference for whether the literal system
 187      * identifier should be used when resolving system
 188      * identifiers when both it and the expanded system
 189      * identifier are available. If this property has not yet
 190      * been explicitly set its value is <code>true</code>.</p>
 191      *
 192      * @return the preference for using literal system identifers
 193      * for catalog resolution
 194      *
 195      * @see #setUseLiteralSystemId
 196      */
 197     public final boolean getUseLiteralSystemId () {
 198         return fUseLiteralSystemId;
 199     }
 200 
 201     /**
 202      * <p>Sets the preference for whether the literal system
 203      * identifier should be used when resolving system
 204      * identifiers when both it and the expanded system
 205      * identifier are available.</p>
 206      *
 207      * <p>The literal system identifier is the URI as it was
 208      * provided before absolutization. It may be embedded within
 209      * an entity. It may be provided externally or it may be the
 210      * result of redirection. For example, redirection may
 211      * have come from the protocol level through HTTP or from
 212      * an application's entity resolver.</p>
 213      *
 214      * <p>The expanded system identifier is an absolute URI
 215      * which is the result of resolving the literal system
 216      * identifier against a base URI.</p>
 217      *
 218      * @param useLiteralSystemId the preference for using
 219      * literal system identifers for catalog resolution
 220      */
 221     public final void setUseLiteralSystemId (boolean useLiteralSystemId) {
 222         fUseLiteralSystemId = useLiteralSystemId;
 223     }
 224 
 225     /**
 226      * <p>Resolves an external entity. If the entity cannot be
 227      * resolved, this method should return <code>null</code>. This
 228      * method returns an input source if an entry was found in the
 229      * catalog for the given external identifier. It should be
 230      * overrided if other behaviour is required.</p>
 231      *
 232      * @param publicId the public identifier, or <code>null</code> if none was supplied
 233      * @param systemId the system identifier
 234      *
 235      * @throws SAXException any SAX exception, possibly wrapping another exception
 236      * @throws IOException thrown if some i/o error occurs
 237      */
 238     public InputSource resolveEntity(String publicId, String systemId)
 239          throws SAXException, IOException {
 240 
 241         String resolvedId = null;
 242         if (publicId != null && systemId != null) {
 243             resolvedId = resolvePublic(publicId, systemId);
 244         }
 245         else if (systemId != null) {
 246             resolvedId = resolveSystem(systemId);
 247         }
 248 
 249         if (resolvedId != null) {
 250             InputSource source = new InputSource(resolvedId);
 251             source.setPublicId(publicId);
 252             return source;
 253         }
 254         return null;
 255     }
 256 
 257      /**
 258       * <p>Resolves an external entity. If the entity cannot be
 259       * resolved, this method should return <code>null</code>. This
 260       * method returns an input source if an entry was found in the
 261       * catalog for the given external identifier. It should be
 262       * overrided if other behaviour is required.</p>
 263       *
 264       * @param name the identifier of the external entity
 265       * @param publicId the public identifier, or <code>null</code> if none was supplied
 266       * @param baseURI the URI with respect to which relative systemIDs are interpreted.
 267       * @param systemId the system identifier
 268       *
 269       * @throws SAXException any SAX exception, possibly wrapping another exception
 270       * @throws IOException thrown if some i/o error occurs
 271       */
 272      public InputSource resolveEntity(String name, String publicId,
 273          String baseURI, String systemId) throws SAXException, IOException {
 274 
 275          String resolvedId = null;
 276 
 277          if (!getUseLiteralSystemId() && baseURI != null) {
 278              // Attempt to resolve the system identifier against the base URI.
 279              try {
 280                  URI uri = new URI(new URI(baseURI), systemId);
 281                  systemId = uri.toString();
 282              }
 283              // Ignore the exception. Fallback to the literal system identifier.
 284              catch (URI.MalformedURIException ex) {}
 285          }
 286 
 287          if (publicId != null && systemId != null) {
 288              resolvedId = resolvePublic(publicId, systemId);
 289          }
 290          else if (systemId != null) {
 291              resolvedId = resolveSystem(systemId);
 292          }
 293 
 294          if (resolvedId != null) {
 295              InputSource source = new InputSource(resolvedId);
 296              source.setPublicId(publicId);
 297              return source;
 298          }
 299          return null;
 300     }
 301 
 302      /**
 303       * <p>Locates an external subset for documents which do not explicitly
 304       * provide one. This method always returns <code>null</code>. It
 305       * should be overrided if other behaviour is required.</p>
 306       *
 307       * @param name the identifier of the document root element
 308       * @param baseURI the document's base URI
 309       *
 310       * @throws SAXException any SAX exception, possibly wrapping another exception
 311       * @throws IOException thrown if some i/o error occurs
 312       */
 313      public InputSource getExternalSubset(String name, String baseURI)
 314          throws SAXException, IOException {
 315          return null;
 316      }
 317 
 318     /**
 319      * <p>Resolves a resource using the catalog. This method interprets that
 320      * the namespace URI corresponds to uri entries in the catalog.
 321      * Where both a namespace and an external identifier exist, the namespace
 322      * takes precedence.</p>
 323      *
 324      * @param type the type of the resource being resolved
 325      * @param namespaceURI the namespace of the resource being resolved,
 326      * or <code>null</code> if none was supplied
 327      * @param publicId the public identifier of the resource being resolved,
 328      * or <code>null</code> if none was supplied
 329      * @param systemId the system identifier of the resource being resolved,
 330      * or <code>null</code> if none was supplied
 331      * @param baseURI the absolute base URI of the resource being parsed,
 332      * or <code>null</code> if there is no base URI
 333      */
 334     public LSInput resolveResource(String type, String namespaceURI,
 335         String publicId, String systemId, String baseURI) {
 336 
 337         String resolvedId = null;
 338 
 339         try {
 340             // The namespace is useful for resolving namespace aware
 341             // grammars such as XML schema. Let it take precedence over
 342             // the external identifier if one exists.
 343             if (namespaceURI != null) {
 344                 resolvedId = resolveURI(namespaceURI);
 345             }
 346 
 347             if (!getUseLiteralSystemId() && baseURI != null) {
 348                 // Attempt to resolve the system identifier against the base URI.
 349                 try {
 350                     URI uri = new URI(new URI(baseURI), systemId);
 351                     systemId = uri.toString();
 352                 }
 353                 // Ignore the exception. Fallback to the literal system identifier.
 354                 catch (URI.MalformedURIException ex) {}
 355             }
 356 
 357             // Resolve against an external identifier if one exists. This
 358             // is useful for resolving DTD external subsets and other
 359             // external entities. For XML schemas if there was no namespace
 360             // mapping we might be able to resolve a system identifier
 361             // specified as a location hint.
 362             if (resolvedId == null) {
 363                 if (publicId != null && systemId != null) {
 364                     resolvedId = resolvePublic(publicId, systemId);
 365                 }
 366                 else if (systemId != null) {
 367                     resolvedId = resolveSystem(systemId);
 368                 }
 369             }
 370         }
 371         // Ignore IOException. It cannot be thrown from this method.
 372         catch (IOException ex) {}
 373 
 374         if (resolvedId != null) {
 375             return new DOMInputImpl(publicId, resolvedId, baseURI);
 376         }
 377         return null;
 378     }
 379 
 380 
 381     /**
 382      * <p>Resolves an external entity. If the entity cannot be
 383      * resolved, this method should return <code>null</code>. This
 384      * method only calls <code>resolveIdentifier</code> and returns
 385      * an input source if an entry was found in the catalog. It
 386      * should be overrided if other behaviour is required.</p>
 387      *
 388      * @param resourceIdentifier location of the XML resource to resolve
 389      *
 390      * @throws XNIException thrown on general error
 391      * @throws IOException thrown if some i/o error occurs
 392      */
 393     public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier)
 394         throws XNIException, IOException {
 395 
 396         String resolvedId = resolveIdentifier(resourceIdentifier);
 397         if (resolvedId != null) {
 398             return new XMLInputSource(resourceIdentifier.getPublicId(),
 399                                       resolvedId,
 400                                       resourceIdentifier.getBaseSystemId());
 401         }
 402         return null;
 403     }
 404 
 405     /**
 406      * <p>Resolves an identifier using the catalog. This method interprets that
 407      * the namespace of the identifier corresponds to uri entries in the catalog.
 408      * Where both a namespace and an external identifier exist, the namespace
 409      * takes precedence.</p>
 410      *
 411      * @param resourceIdentifier the identifier to resolve
 412      *
 413      * @throws XNIException thrown on general error
 414      * @throws IOException thrown if some i/o error occurs
 415      */
 416     public String resolveIdentifier(XMLResourceIdentifier resourceIdentifier)
 417         throws IOException, XNIException {
 418 
 419         String resolvedId = null;
 420 
 421         // The namespace is useful for resolving namespace aware
 422         // grammars such as XML schema. Let it take precedence over
 423         // the external identifier if one exists.
 424         String namespace = resourceIdentifier.getNamespace();
 425         if (namespace != null) {
 426             resolvedId = resolveURI(namespace);
 427         }
 428 
 429         // Resolve against an external identifier if one exists. This
 430         // is useful for resolving DTD external subsets and other
 431         // external entities. For XML schemas if there was no namespace
 432         // mapping we might be able to resolve a system identifier
 433         // specified as a location hint.
 434         if (resolvedId == null) {
 435             String publicId = resourceIdentifier.getPublicId();
 436             String systemId = getUseLiteralSystemId()
 437                 ? resourceIdentifier.getLiteralSystemId()
 438                 : resourceIdentifier.getExpandedSystemId();
 439             if (publicId != null && systemId != null) {
 440                 resolvedId = resolvePublic(publicId, systemId);
 441             }
 442             else if (systemId != null) {
 443                 resolvedId = resolveSystem(systemId);
 444             }
 445         }
 446         return resolvedId;
 447     }
 448 
 449     /**
 450      * <p>Returns the URI mapping in the catalog for the given
 451      * external identifier or <code>null</code> if no mapping
 452      * exists. If the system identifier is an URN in the
 453      * <code>publicid</code> namespace it is converted into
 454      * a public identifier by URN "unwrapping" as specified
 455      * in the XML Catalogs specification.</p>
 456      *
 457      * @param systemId the system identifier to locate in the catalog
 458      *
 459      * @return the mapped URI or <code>null</code> if no mapping
 460      * was found in the catalog
 461      *
 462      * @throws IOException if an i/o error occurred while reading
 463      * the catalog
 464      */
 465     public final synchronized String resolveSystem (String systemId)
 466         throws IOException {
 467 
 468         if (fCatalogsChanged) {
 469             parseCatalogs();
 470             fCatalogsChanged = false;
 471         }
 472         return (fCatalog != null)
 473             ? fCatalog.resolveSystem(systemId) : null;
 474     }
 475 
 476     /**
 477      * <p>Returns the URI mapping in the catalog for the given
 478      * external identifier or <code>null</code> if no mapping
 479      * exists. Public identifiers are normalized before
 480      * comparison.</p>
 481      *
 482      * @param publicId the public identifier to locate in the catalog
 483      * @param systemId the system identifier to locate in the catalog
 484      *
 485      * @return the mapped URI or <code>null</code> if no mapping
 486      * was found in the catalog
 487      *
 488      * @throws IOException if an i/o error occurred while reading
 489      * the catalog
 490      */
 491     public final synchronized String resolvePublic (String publicId, String systemId)
 492         throws IOException {
 493 
 494         if (fCatalogsChanged) {
 495             parseCatalogs();
 496             fCatalogsChanged = false;
 497         }
 498         return (fCatalog != null)
 499             ? fCatalog.resolvePublic(publicId, systemId) : null;
 500     }
 501 
 502     /**
 503      * <p>Returns the URI mapping in the catalog for the given URI
 504      * reference or <code>null</code> if no mapping exists.
 505      * URI comparison is case sensitive. If the URI reference
 506      * is an URN in the <code>publicid</code> namespace
 507      * it is converted into a public identifier by URN "unwrapping"
 508      * as specified in the XML Catalogs specification and then
 509      * resolution is performed following the semantics of
 510      * external identifier resolution.</p>
 511      *
 512      * @param uri the URI to locate in the catalog
 513      *
 514      * @return the mapped URI or <code>null</code> if no mapping
 515      * was found in the catalog
 516      *
 517      * @throws IOException if an i/o error occurred while reading
 518      * the catalog
 519      */
 520     public final synchronized String resolveURI (String uri)
 521         throws IOException {
 522 
 523         if (fCatalogsChanged) {
 524             parseCatalogs();
 525             fCatalogsChanged = false;
 526         }
 527         return (fCatalog != null)
 528             ? fCatalog.resolveURI(uri) : null;
 529     }
 530 
 531     /**
 532      * Initialization. Create a CatalogManager and set all
 533      * the properties upfront. This prevents JVM wide system properties
 534      * or a property file somewhere in the environment from affecting
 535      * the behaviour of this catalog resolver.
 536      */
 537     private void init (String [] catalogs, boolean preferPublic) {
 538         fCatalogsList = (catalogs != null) ? (String[]) catalogs.clone() : null;
 539         fPreferPublic = preferPublic;
 540         fResolverCatalogManager = new CatalogManager();
 541         fResolverCatalogManager.setAllowOasisXMLCatalogPI(false);
 542         fResolverCatalogManager.setCatalogClassName("com.sun.org.apache.xml.internal.resolver.Catalog");
 543         fResolverCatalogManager.setCatalogFiles("");
 544         fResolverCatalogManager.setIgnoreMissingProperties(true);
 545         fResolverCatalogManager.setPreferPublic(fPreferPublic);
 546         fResolverCatalogManager.setRelativeCatalogs(false);
 547         fResolverCatalogManager.setUseStaticCatalog(false);
 548         fResolverCatalogManager.setVerbosity(0);
 549     }
 550 
 551     /**
 552      * Instruct the <code>Catalog</code> to parse each of the
 553      * catalogs in the list. Only the first catalog will actually be
 554      * parsed immediately. The others will be queued and read if
 555      * they are needed later.
 556      */
 557     private void parseCatalogs () throws IOException {
 558         if (fCatalogsList != null) {
 559             fCatalog = new Catalog(fResolverCatalogManager);
 560             attachReaderToCatalog(fCatalog);
 561             for (int i = 0; i < fCatalogsList.length; ++i) {
 562                 String catalog = fCatalogsList[i];
 563                 if (catalog != null && catalog.length() > 0) {
 564                     fCatalog.parseCatalog(catalog);
 565                 }
 566             }
 567         }
 568         else {
 569             fCatalog = null;
 570         }
 571     }
 572 
 573     /**
 574      * Attaches the reader to the catalog.
 575      */
 576     private void attachReaderToCatalog (Catalog catalog) {
 577 
 578         SAXParserFactory spf = JdkXmlUtils.getSAXFactory(
 579                 catalog.getCatalogManager().overrideDefaultParser());
 580         spf.setValidating(false);
 581 
 582         SAXCatalogReader saxReader = new SAXCatalogReader(spf);
 583         saxReader.setCatalogParser(OASISXMLCatalogReader.namespaceName, "catalog",
 584             "com.sun.org.apache.xml.internal.resolver.readers.OASISXMLCatalogReader");
 585         catalog.addReader("application/xml", saxReader);
 586     }
 587 }