1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 // DOMCatalogReader.java - Read XML Catalog files
   6 
   7 /*
   8  * Copyright 2001-2004 The Apache Software Foundation or its licensors,
   9  * as applicable.
  10  *
  11  * Licensed under the Apache License, Version 2.0 (the "License");
  12  * you may not use this file except in compliance with the License.
  13  * You may obtain a copy of the License at
  14  *
  15  *      http://www.apache.org/licenses/LICENSE-2.0
  16  *
  17  * Unless required by applicable law or agreed to in writing, software
  18  * distributed under the License is distributed on an "AS IS" BASIS,
  19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20  * See the License for the specific language governing permissions and
  21  * limitations under the License.
  22  */
  23 
  24 package com.sun.org.apache.xml.internal.resolver.readers;
  25 
  26 import com.sun.org.apache.xml.internal.resolver.Catalog;
  27 import com.sun.org.apache.xml.internal.resolver.CatalogException;
  28 import com.sun.org.apache.xml.internal.resolver.helpers.Namespaces;
  29 import java.io.IOException;
  30 import java.io.InputStream;
  31 import java.net.MalformedURLException;
  32 import java.net.URL;
  33 import java.net.URLConnection;
  34 import java.util.Hashtable;
  35 import javax.xml.parsers.DocumentBuilder;
  36 import javax.xml.parsers.DocumentBuilderFactory;
  37 import javax.xml.parsers.ParserConfigurationException;
  38 import org.w3c.dom.*;
  39 import org.xml.sax.SAXException;
  40 import sun.reflect.misc.ReflectUtil;
  41 
  42 /**
  43  * A DOM-based CatalogReader.
  44  *
  45  * <p>This class is used to read XML Catalogs using the DOM. This reader
  46  * has an advantage over the SAX-based reader that it can analyze the
  47  * DOM tree rather than simply a series of SAX events. It has the disadvantage
  48  * that it requires all of the code necessary to build and walk a DOM
  49  * tree.</p>
  50  *
  51  * <p>Since the choice of CatalogReaders (in the InputStream case) can only
  52  * be made on the basis of MIME type, the following problem occurs: only
  53  * one CatalogReader can exist for all XML mime types. In order to get
  54  * around this problem, the DOMCatalogReader relies on a set of external
  55  * CatalogParsers to actually build the catalog.</p>
  56  *
  57  * <p>The selection of CatalogParsers is made on the basis of the QName
  58  * of the root element of the document.</p>
  59  *
  60  * <p>This class requires the <a href="http://java.sun.com/aboutJava/communityprocess/final/jsr005/index.html">Java API for XML Parsing</a>.</p>
  61  *
  62  * @see Catalog
  63  * @see CatalogReader
  64  * @see SAXCatalogReader
  65  * @see TextCatalogReader
  66  * @see DOMCatalogParser
  67  *
  68  * @author Norman Walsh
  69  * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
  70  *
  71  */
  72 public class DOMCatalogReader implements CatalogReader {
  73   /**
  74    * Mapping table from QNames to CatalogParser classes.
  75    *
  76    * <p>Each key in this hash table has the form "elementname"
  77    * or "{namespaceuri}elementname". The former is used if the
  78    * namespace URI is null.</p>
  79    */
  80   protected Hashtable namespaceMap = new Hashtable();
  81 
  82   /**
  83    * Add a new parser to the reader.
  84    *
  85    * <p>This method associates the specified parserClass with the
  86    * namespaceURI/rootElement names specified.</p>
  87    *
  88    * @param namespaceURI The namespace URI. <em>Not</em> the prefix.
  89    * @param rootElement The name of the root element.
  90    * @param parserClass The name of the parserClass to instantiate
  91    * for this kind of catalog.
  92    */
  93   public void setCatalogParser(String namespaceURI,
  94                                String rootElement,
  95                                String parserClass) {
  96     if (namespaceURI == null) {
  97       namespaceMap.put(rootElement, parserClass);
  98     } else {
  99       namespaceMap.put("{"+namespaceURI+"}"+rootElement, parserClass);
 100     }
 101   }
 102 
 103   /**
 104    * Get the name of the parser class for a given catalog type.
 105    *
 106    * <p>This method returns the parserClass associated with the
 107    * namespaceURI/rootElement names specified.</p>
 108    *
 109    * @param namespaceURI The namespace URI. <em>Not</em> the prefix.
 110    * @param rootElement The name of the root element.
 111    * @return The parser class.
 112    */
 113   public String getCatalogParser(String namespaceURI,
 114                                  String rootElement) {
 115     if (namespaceURI == null) {
 116       return (String) namespaceMap.get(rootElement);
 117     } else {
 118       return (String) namespaceMap.get("{"+namespaceURI+"}"+rootElement);
 119     }
 120   }
 121 
 122   /**
 123    * Null constructor; something for subclasses to call.
 124    */
 125   public DOMCatalogReader() { }
 126 
 127   /**
 128    * Read a catalog from an input stream.
 129    *
 130    * <p>This class reads a catalog from an input stream:</p>
 131    *
 132    * <ul>
 133    * <li>Based on the QName of the root element, it determines which
 134    * parser to instantiate for this catalog.</li>
 135    * <li>It constructs a DOM Document from the catalog and</li>
 136    * <li>For each child of the root node, it calls the parser's
 137    * parseCatalogEntry method. This method is expected to make
 138    * appropriate calls back into the catalog to add entries for the
 139    * entries in the catalog. It is free to do this in whatever manner
 140    * is appropriate (perhaps using just the node passed in, perhaps
 141    * wandering arbitrarily throughout the tree).</li>
 142    * </ul>
 143    *
 144    * @param catalog The catalog for which this reader is called.
 145    * @param is The input stream that is to be read.
 146    * @throws IOException if the URL cannot be read.
 147    * @throws UnknownCatalogFormatException if the catalog format is
 148    * not recognized.
 149    * @throws UnparseableCatalogException if the catalog cannot be parsed.
 150    * (For example, if it is supposed to be XML and isn't well-formed or
 151    * if the parser class cannot be instantiated.)
 152    */
 153   public void readCatalog(Catalog catalog, InputStream is)
 154     throws IOException, CatalogException {
 155 
 156     DocumentBuilderFactory factory = null;
 157     DocumentBuilder builder = null;
 158 
 159     factory = DocumentBuilderFactory.newInstance();
 160     factory.setNamespaceAware(false);
 161     factory.setValidating(false);
 162     try {
 163       builder = factory.newDocumentBuilder();
 164     } catch (ParserConfigurationException pce) {
 165       throw new CatalogException(CatalogException.UNPARSEABLE);
 166     }
 167 
 168     Document doc = null;
 169 
 170     try {
 171       doc = builder.parse(is);
 172     } catch (SAXException se) {
 173       throw new CatalogException(CatalogException.UNKNOWN_FORMAT);
 174     }
 175 
 176     Element root = doc.getDocumentElement();
 177 
 178     String namespaceURI = Namespaces.getNamespaceURI(root);
 179     String localName    = Namespaces.getLocalName(root);
 180 
 181     String domParserClass = getCatalogParser(namespaceURI,
 182                                              localName);
 183 
 184     if (domParserClass == null) {
 185       if (namespaceURI == null) {
 186         catalog.getCatalogManager().debug.message(1, "No Catalog parser for "
 187                                                   + localName);
 188       } else {
 189         catalog.getCatalogManager().debug.message(1, "No Catalog parser for "
 190                                                   + "{" + namespaceURI + "}"
 191                                                   + localName);
 192       }
 193       return;
 194     }
 195 
 196     DOMCatalogParser domParser = null;
 197 
 198     try {
 199       domParser = (DOMCatalogParser) ReflectUtil.forName(domParserClass).newInstance();
 200     } catch (ClassNotFoundException cnfe) {
 201       catalog.getCatalogManager().debug.message(1, "Cannot load XML Catalog Parser class", domParserClass);
 202       throw new CatalogException(CatalogException.UNPARSEABLE);
 203     } catch (InstantiationException ie) {
 204       catalog.getCatalogManager().debug.message(1, "Cannot instantiate XML Catalog Parser class", domParserClass);
 205       throw new CatalogException(CatalogException.UNPARSEABLE);
 206     } catch (IllegalAccessException iae) {
 207       catalog.getCatalogManager().debug.message(1, "Cannot access XML Catalog Parser class", domParserClass);
 208       throw new CatalogException(CatalogException.UNPARSEABLE);
 209     } catch (ClassCastException cce ) {
 210       catalog.getCatalogManager().debug.message(1, "Cannot cast XML Catalog Parser class", domParserClass);
 211       throw new CatalogException(CatalogException.UNPARSEABLE);
 212     }
 213 
 214     Node node = root.getFirstChild();
 215     while (node != null) {
 216       domParser.parseCatalogEntry(catalog, node);
 217       node = node.getNextSibling();
 218     }
 219   }
 220 
 221   /**
 222    * Read the catalog behind the specified URL.
 223    *
 224    * @see #readCatalog(Catalog, InputStream)
 225    *
 226    * @param catalog The catalog for which we are reading.
 227    * @param fileUrl The URL of the document that should be read.
 228    *
 229    * @throws MalformedURLException if the specified URL cannot be
 230    * turned into a URL object.
 231    * @throws IOException if the URL cannot be read.
 232    * @throws UnknownCatalogFormatException if the catalog format is
 233    * not recognized.
 234    * @throws UnparseableCatalogException if the catalog cannot be parsed.
 235    * (For example, if it is supposed to be XML and isn't well-formed.)
 236    */
 237   public void readCatalog(Catalog catalog, String fileUrl)
 238     throws MalformedURLException, IOException, CatalogException {
 239     URL url = new URL(fileUrl);
 240     URLConnection urlCon = url.openConnection();
 241     readCatalog(catalog, urlCon.getInputStream());
 242   }
 243 }