1 /*
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17 
  18 package com.sun.org.apache.xml.internal.resolver.tools;
  19 
  20 import java.io.IOException;
  21 import java.io.InputStream;
  22 import java.net.MalformedURLException;
  23 import java.net.URL;
  24 
  25 import org.xml.sax.InputSource;
  26 import org.xml.sax.SAXException;
  27 import org.xml.sax.XMLReader;
  28 import org.xml.sax.Attributes;
  29 import org.xml.sax.helpers.XMLFilterImpl;
  30 
  31 import com.sun.org.apache.xml.internal.resolver.Catalog;
  32 import com.sun.org.apache.xml.internal.resolver.CatalogManager;
  33 
  34 import com.sun.org.apache.xml.internal.resolver.helpers.FileURL;
  35 
  36 /**
  37  * A SAX XMLFilter that performs catalog-based entity resolution.
  38  *
  39  * <p>This class implements a SAX XMLFilter that performs entity resolution
  40  * using the CatalogResolver. The actual, underlying parser is obtained
  41  * from a SAXParserFactory.</p>
  42  * </p>
  43  *
  44  * @see CatalogResolver
  45  * @see org.xml.sax.XMLFilter
  46  *
  47  * @author Norman Walsh
  48  * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
  49  *
  50  * @version 1.0
  51  */
  52 public class ResolvingXMLFilter extends XMLFilterImpl {
  53   /**
  54    * Suppress explanatory message?
  55    *
  56    * @see #parse(InputSource)
  57    */
  58   private static boolean suppressExplanation = false;
  59 
  60   /** The manager for the underlying resolver. */
  61   CatalogManager catalogManager = CatalogManager.getStaticManager();
  62 
  63   /** The underlying catalog resolver. */
  64   private CatalogResolver catalogResolver = null;
  65 
  66   /** A separate resolver for oasis-xml-pi catalogs. */
  67   private CatalogResolver piCatalogResolver = null;
  68 
  69   /** Are we in the prolog? Is an oasis-xml-catalog PI valid now? */
  70   private boolean allowXMLCatalogPI = false;
  71 
  72   /** The base URI of the input document, if known. */
  73   private URL baseURL = null;
  74 
  75   /** Construct an empty XML Filter with no parent. */
  76   public ResolvingXMLFilter() {
  77     super();
  78     catalogResolver = new CatalogResolver(catalogManager);
  79   }
  80 
  81   /** Construct an XML filter with the specified parent. */
  82   public ResolvingXMLFilter(XMLReader parent) {
  83     super(parent);
  84     catalogResolver = new CatalogResolver(catalogManager);
  85   }
  86 
  87   /** Construct an XML filter with the specified parent. */
  88   public ResolvingXMLFilter(CatalogManager manager) {
  89     super();
  90     catalogManager = manager;
  91     catalogResolver = new CatalogResolver(catalogManager);
  92   }
  93 
  94   /** Construct an XML filter with the specified parent. */
  95   public ResolvingXMLFilter(XMLReader parent, CatalogManager manager) {
  96     super(parent);
  97     catalogManager = manager;
  98     catalogResolver = new CatalogResolver(catalogManager);
  99   }
 100 
 101   /**
 102    * Provide accessto the underlying Catalog.
 103    */
 104   public Catalog getCatalog() {
 105     return catalogResolver.getCatalog();
 106   }
 107 
 108   /**
 109    * SAX XMLReader API.
 110    *
 111    * <p>Note that the JAXP 1.1ea2 parser crashes with an InternalError if
 112    * it encounters a system identifier that appears to be a relative URI
 113    * that begins with a slash. For example, the declaration:</p>
 114    *
 115    * <pre>
 116    * &lt;!DOCTYPE book SYSTEM "/path/to/dtd/on/my/system/docbookx.dtd">
 117    * </pre>
 118    *
 119    * <p>would cause such an error. As a convenience, this method catches
 120    * that error and prints an explanation. (Unfortunately, it's not possible
 121    * to identify the particular system identifier that causes the problem.)
 122    * </p>
 123    *
 124    * <p>The underlying error is forwarded after printing the explanatory
 125    * message. The message is only every printed once and if
 126    * <code>suppressExplanation</code> is set to <code>false</code> before
 127    * parsing, it will never be printed.</p>
 128    */
 129   public void parse(InputSource input)
 130     throws IOException, SAXException {
 131     allowXMLCatalogPI = true;
 132 
 133     setupBaseURI(input.getSystemId());
 134 
 135     try {
 136       super.parse(input);
 137     } catch (InternalError ie) {
 138       explain(input.getSystemId());
 139       throw ie;
 140     }
 141   }
 142 
 143   /** SAX XMLReader API.
 144    *
 145    * @see #parse(InputSource)
 146    */
 147   public void parse(String systemId)
 148     throws IOException, SAXException {
 149     allowXMLCatalogPI = true;
 150 
 151     setupBaseURI(systemId);
 152 
 153     try {
 154       super.parse(systemId);
 155     } catch (InternalError ie) {
 156       explain(systemId);
 157       throw ie;
 158     }
 159   }
 160 
 161   /**
 162    * Implements the <code>resolveEntity</code> method
 163    * for the SAX interface, using an underlying CatalogResolver
 164    * to do the real work.
 165    */
 166   public InputSource resolveEntity (String publicId, String systemId) {
 167     allowXMLCatalogPI = false;
 168     String resolved = catalogResolver.getResolvedEntity(publicId, systemId);
 169 
 170     if (resolved == null && piCatalogResolver != null) {
 171       resolved = piCatalogResolver.getResolvedEntity(publicId, systemId);
 172     }
 173 
 174     if (resolved != null) {
 175       try {
 176         InputSource iSource = new InputSource(resolved);
 177         iSource.setPublicId(publicId);
 178 
 179         // Ideally this method would not attempt to open the
 180         // InputStream, but there is a bug (in Xerces, at least)
 181         // that causes the parser to mistakenly open the wrong
 182         // system identifier if the returned InputSource does
 183         // not have a byteStream.
 184         //
 185         // It could be argued that we still shouldn't do this here,
 186         // but since the purpose of calling the entityResolver is
 187         // almost certainly to open the input stream, it seems to
 188         // do little harm.
 189         //
 190         URL url = new URL(resolved);
 191         InputStream iStream = url.openStream();
 192         iSource.setByteStream(iStream);
 193 
 194         return iSource;
 195       } catch (Exception e) {
 196         catalogManager.debug.message(1,
 197                                      "Failed to create InputSource ("
 198                                      + e.toString()
 199                                      + ")", resolved);
 200         return null;
 201       }
 202     } else {
 203       return null;
 204     }
 205   }
 206 
 207   /** SAX DTDHandler API.
 208    *
 209    * <p>Captured here only to detect the end of the prolog so that
 210    * we can ignore subsequent oasis-xml-catalog PIs. Otherwise
 211    * the events are just passed through.</p>
 212    */
 213   public void notationDecl (String name, String publicId, String systemId)
 214     throws SAXException {
 215     allowXMLCatalogPI = false;
 216     super.notationDecl(name,publicId,systemId);
 217   }
 218 
 219   /** SAX DTDHandler API.
 220    *
 221    * <p>Captured here only to detect the end of the prolog so that
 222    * we can ignore subsequent oasis-xml-catalog PIs. Otherwise
 223    * the events are just passed through.</p>
 224    */
 225   public void unparsedEntityDecl (String name,
 226                                   String publicId,
 227                                   String systemId,
 228                                   String notationName)
 229     throws SAXException {
 230     allowXMLCatalogPI = false;
 231     super.unparsedEntityDecl (name, publicId, systemId, notationName);
 232   }
 233 
 234   /** SAX ContentHandler API.
 235    *
 236    * <p>Captured here only to detect the end of the prolog so that
 237    * we can ignore subsequent oasis-xml-catalog PIs. Otherwise
 238    * the events are just passed through.</p>
 239    */
 240   public void startElement (String uri, String localName, String qName,
 241                             Attributes atts)
 242     throws SAXException {
 243     allowXMLCatalogPI = false;
 244     super.startElement(uri,localName,qName,atts);
 245   }
 246 
 247   /** SAX ContentHandler API.
 248    *
 249    * <p>Detect and use the oasis-xml-catalog PI if it occurs.</p>
 250    */
 251   public void processingInstruction(String target, String pidata)
 252     throws SAXException {
 253     if (target.equals("oasis-xml-catalog")) {
 254       URL catalog = null;
 255       String data = pidata;
 256 
 257       int pos = data.indexOf("catalog=");
 258       if (pos >= 0) {
 259         data = data.substring(pos+8);
 260         if (data.length() > 1) {
 261           String quote = data.substring(0,1);
 262           data = data.substring(1);
 263           pos = data.indexOf(quote);
 264           if (pos >= 0) {
 265             data = data.substring(0, pos);
 266             try {
 267               if (baseURL != null) {
 268                 catalog = new URL(baseURL, data);
 269               } else {
 270                 catalog = new URL(data);
 271               }
 272             } catch (MalformedURLException mue) {
 273               // nevermind
 274             }
 275           }
 276         }
 277       }
 278 
 279       if (allowXMLCatalogPI) {
 280         if (catalogManager.getAllowOasisXMLCatalogPI()) {
 281           catalogManager.debug.message(4,"oasis-xml-catalog PI", pidata);
 282 
 283           if (catalog != null) {
 284             try {
 285               catalogManager.debug.message(4,"oasis-xml-catalog", catalog.toString());
 286 
 287               if (piCatalogResolver == null) {
 288                 piCatalogResolver = new CatalogResolver(true);
 289               }
 290 
 291               piCatalogResolver.getCatalog().parseCatalog(catalog.toString());
 292             } catch (Exception e) {
 293               catalogManager.debug.message(3, "Exception parsing oasis-xml-catalog: "
 294                             + catalog.toString());
 295             }
 296           } else {
 297             catalogManager.debug.message(3, "PI oasis-xml-catalog unparseable: " + pidata);
 298           }
 299         } else {
 300           catalogManager.debug.message(4,"PI oasis-xml-catalog ignored: " + pidata);
 301         }
 302       } else {
 303         catalogManager.debug.message(3, "PI oasis-xml-catalog occurred in an invalid place: "
 304                       + pidata);
 305       }
 306     } else {
 307       super.processingInstruction(target, pidata);
 308     }
 309   }
 310 
 311   /** Save the base URI of the document being parsed. */
 312   private void setupBaseURI(String systemId) {
 313     URL cwd = null;
 314 
 315     try {
 316       cwd = FileURL.makeURL("basename");
 317     } catch (MalformedURLException mue) {
 318       cwd = null;
 319     }
 320 
 321     try {
 322       baseURL = new URL(systemId);
 323     } catch (MalformedURLException mue) {
 324       if (cwd != null) {
 325         try {
 326           baseURL = new URL(cwd, systemId);
 327         } catch (MalformedURLException mue2) {
 328           // give up
 329           baseURL = null;
 330         }
 331       } else {
 332         // give up
 333         baseURL = null;
 334       }
 335     }
 336   }
 337 
 338   /** Provide one possible explanation for an InternalError. */
 339   private void explain(String systemId) {
 340     if (!suppressExplanation) {
 341       System.out.println("XMLReader probably encountered bad URI in " + systemId);
 342       System.out.println("For example, replace '/some/uri' with 'file:/some/uri'.");
 343     }
 344     suppressExplanation = true;
 345   }
 346 }