1 /*
   2  * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.ws.wsdl.parser;
  27 
  28 import com.sun.istack.internal.NotNull;
  29 import com.sun.tools.internal.ws.util.xml.XmlUtil;
  30 import com.sun.tools.internal.ws.wscompile.ErrorReceiver;
  31 import com.sun.tools.internal.ws.wscompile.WsimportOptions;
  32 import com.sun.tools.internal.ws.wsdl.document.schema.SchemaConstants;
  33 import com.sun.tools.internal.xjc.reader.internalizer.LocatorTable;
  34 import com.sun.xml.internal.bind.marshaller.DataWriter;
  35 import org.w3c.dom.Document;
  36 import org.w3c.dom.Element;
  37 import org.w3c.dom.NodeList;
  38 import org.xml.sax.ContentHandler;
  39 import org.xml.sax.*;
  40 import org.xml.sax.helpers.XMLFilterImpl;
  41 
  42 import javax.xml.parsers.DocumentBuilder;
  43 import javax.xml.parsers.DocumentBuilderFactory;
  44 import javax.xml.parsers.ParserConfigurationException;
  45 import javax.xml.parsers.SAXParserFactory;
  46 import javax.xml.transform.Transformer;
  47 import javax.xml.transform.TransformerException;
  48 import javax.xml.transform.TransformerFactory;
  49 import javax.xml.transform.dom.DOMSource;
  50 import javax.xml.transform.sax.SAXResult;
  51 import java.io.IOException;
  52 import java.io.InputStream;
  53 import java.io.OutputStream;
  54 import java.io.OutputStreamWriter;
  55 import java.net.*;
  56 import java.util.*;
  57 
  58 /**
  59  * @author Vivek Pandey
  60  */
  61 public class DOMForest {
  62     /**
  63      * To correctly feed documents to a schema parser, we need to remember
  64      * which documents (of the forest) were given as the root
  65      * documents, and which of them are read as included/imported
  66      * documents.
  67      * <p/>
  68      * <p/>
  69      * Set of system ids as strings.
  70      */
  71     protected final Set<String> rootDocuments = new HashSet<String>();
  72 
  73     /**
  74      * Contains wsdl:import(s)
  75      */
  76     protected final Set<String> externalReferences = new HashSet<String>();
  77 
  78     /**
  79      * actual data storage map&lt;SystemId,Document>.
  80      */
  81     protected final Map<String, Document> core = new HashMap<String, Document>();
  82     protected final ErrorReceiver errorReceiver;
  83 
  84     private final DocumentBuilder documentBuilder;
  85     private final SAXParserFactory parserFactory;
  86 
  87     /**
  88      * inlined schema elements inside wsdl:type section
  89      */
  90     protected final List<Element> inlinedSchemaElements = new ArrayList<Element>();
  91 
  92 
  93     /**
  94      * Stores location information for all the trees in this forest.
  95      */
  96     public final LocatorTable locatorTable = new LocatorTable();
  97 
  98     protected final EntityResolver entityResolver;
  99     /**
 100      * Stores all the outer-most &lt;jaxb:bindings> customizations.
 101      */
 102     public final Set<Element> outerMostBindings = new HashSet<Element>();
 103 
 104     /**
 105      * Schema language dependent part of the processing.
 106      */
 107     protected final InternalizationLogic logic;
 108     protected final WsimportOptions options;
 109 
 110     public DOMForest(InternalizationLogic logic, @NotNull EntityResolver entityResolver, WsimportOptions options, ErrorReceiver errReceiver) {
 111         this.options = options;
 112         this.entityResolver = entityResolver;
 113         this.errorReceiver = errReceiver;
 114         this.logic = logic;
 115         try {
 116             // secure xml processing can be switched off if input requires it
 117             boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity;
 118             DocumentBuilderFactory dbf = XmlUtil.newDocumentBuilderFactory(secureProcessingEnabled);
 119             dbf.setNamespaceAware(true);
 120             this.documentBuilder = dbf.newDocumentBuilder();
 121 
 122             this.parserFactory = XmlUtil.newSAXParserFactory(secureProcessingEnabled);
 123             this.parserFactory.setNamespaceAware(true);
 124         } catch (ParserConfigurationException e) {
 125             throw new AssertionError(e);
 126         }
 127     }
 128 
 129     public List<Element> getInlinedSchemaElement() {
 130         return inlinedSchemaElements;
 131     }
 132 
 133     public @NotNull Document parse(InputSource source, boolean root) throws SAXException, IOException {
 134         if (source.getSystemId() == null)
 135             throw new IllegalArgumentException();
 136         return parse(source.getSystemId(), source, root);
 137     }
 138 
 139     /**
 140      * Parses an XML at the given location (
 141      * and XMLs referenced by it) into DOM trees
 142      * and stores them to this forest.
 143      *
 144      * @return the parsed DOM document object.
 145      */
 146     public Document parse(String systemId, boolean root) throws SAXException, IOException{
 147 
 148         systemId = normalizeSystemId(systemId);
 149 
 150         InputSource is = null;
 151 
 152         // allow entity resolver to find the actual byte stream.
 153         is = entityResolver.resolveEntity(null, systemId);
 154         if (is == null)
 155             is = new InputSource(systemId);
 156         else {
 157             resolvedCache.put(systemId, is.getSystemId());
 158             systemId=is.getSystemId();
 159         }
 160 
 161         if (core.containsKey(systemId)) {
 162             // this document has already been parsed. Just ignore.
 163             return core.get(systemId);
 164         }
 165 
 166         if(!root)
 167             addExternalReferences(systemId);
 168 
 169         // but we still use the original system Id as the key.
 170         return parse(systemId, is, root);
 171     }
 172     protected Map<String,String> resolvedCache = new HashMap<String,String>();
 173 
 174     public Map<String,String> getReferencedEntityMap() {
 175         return resolvedCache;
 176     }
 177     /**
 178      * Parses the given document and add it to the DOM forest.
 179      *
 180      * @return null if there was a parse error. otherwise non-null.
 181      */
 182     private @NotNull Document parse(String systemId, InputSource inputSource, boolean root) throws SAXException, IOException{
 183         Document dom = documentBuilder.newDocument();
 184 
 185         systemId = normalizeSystemId(systemId);
 186 
 187         // put into the map before growing a tree, to
 188         // prevent recursive reference from causing infinite loop.
 189         core.put(systemId, dom);
 190 
 191         dom.setDocumentURI(systemId);
 192         if (root)
 193             rootDocuments.add(systemId);
 194 
 195         try {
 196             XMLReader reader = createReader(dom);
 197 
 198             InputStream is = null;
 199             if(inputSource.getByteStream() == null){
 200                 inputSource = entityResolver.resolveEntity(null, systemId);
 201             }
 202             reader.parse(inputSource);
 203             Element doc = dom.getDocumentElement();
 204             if (doc == null) {
 205                 return null;
 206             }
 207             NodeList schemas = doc.getElementsByTagNameNS(SchemaConstants.NS_XSD, "schema");
 208             for (int i = 0; i < schemas.getLength(); i++) {
 209                 inlinedSchemaElements.add((Element) schemas.item(i));
 210             }
 211         } catch (ParserConfigurationException e) {
 212             errorReceiver.error(e);
 213             throw new SAXException(e.getMessage());
 214         }
 215         resolvedCache.put(systemId, dom.getDocumentURI());
 216         return dom;
 217     }
 218 
 219     public void addExternalReferences(String ref) {
 220         if (!externalReferences.contains(ref))
 221             externalReferences.add(ref);
 222     }
 223 
 224 
 225     public Set<String> getExternalReferences() {
 226         return externalReferences;
 227     }
 228 
 229 
 230 
 231     public interface Handler extends ContentHandler {
 232         /**
 233          * Gets the DOM that was built.
 234          */
 235         public Document getDocument();
 236     }
 237 
 238     /**
 239          * Returns a {@link org.xml.sax.XMLReader} to parse a document into this DOM forest.
 240          * <p/>
 241          * This version requires that the DOM object to be created and registered
 242          * to the map beforehand.
 243          */
 244     private XMLReader createReader(Document dom) throws SAXException, ParserConfigurationException {
 245         XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 246         DOMBuilder dombuilder = new DOMBuilder(dom, locatorTable, outerMostBindings);
 247         try {
 248             reader.setProperty("http://xml.org/sax/properties/lexical-handler", dombuilder);
 249         } catch(SAXException e) {
 250             errorReceiver.debug(e.getMessage());
 251         }
 252 
 253         ContentHandler handler = new WhitespaceStripper(dombuilder, errorReceiver, entityResolver);
 254         handler = new VersionChecker(handler, errorReceiver, entityResolver);
 255 
 256         // insert the reference finder so that
 257         // included/imported schemas will be also parsed
 258         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 259         f.setContentHandler(handler);
 260         if (errorReceiver != null)
 261             f.setErrorHandler(errorReceiver);
 262         f.setEntityResolver(entityResolver);
 263 
 264         reader.setContentHandler(f);
 265         if (errorReceiver != null)
 266             reader.setErrorHandler(errorReceiver);
 267         reader.setEntityResolver(entityResolver);
 268         return reader;
 269     }
 270 
 271     private String normalizeSystemId(String systemId) {
 272         try {
 273             systemId = new URI(systemId).normalize().toString();
 274         } catch (URISyntaxException e) {
 275             // leave the system ID untouched. In my experience URI is often too strict
 276         }
 277         return systemId;
 278     }
 279 
 280     boolean isExtensionMode() {
 281         return options.isExtensionMode();
 282     }
 283 
 284 
 285     /**
 286      * Gets the DOM tree associated with the specified system ID,
 287      * or null if none is found.
 288      */
 289     public Document get(String systemId) {
 290         Document doc = core.get(systemId);
 291 
 292         if (doc == null && systemId.startsWith("file:/") && !systemId.startsWith("file://")) {
 293             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 294             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 295             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 296             // and this descripancy breaks DOM look up by system ID.
 297 
 298             // this extra check solves this problem.
 299             doc = core.get("file://" + systemId.substring(5));
 300         }
 301 
 302         if (doc == null && systemId.startsWith("file:")) {
 303             // on Windows, filenames are case insensitive.
 304             // perform case-insensitive search for improved user experience
 305             String systemPath = getPath(systemId);
 306             for (String key : core.keySet()) {
 307                 if (key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 308                     doc = core.get(key);
 309                     break;
 310                 }
 311             }
 312         }
 313 
 314         return doc;
 315     }
 316 
 317     /**
 318      * Strips off the leading 'file:///' portion from an URL.
 319      */
 320     private String getPath(String key) {
 321         key = key.substring(5); // skip 'file:'
 322         while (key.length() > 0 && key.charAt(0) == '/')
 323             key = key.substring(1);
 324         return key;
 325     }
 326 
 327     /**
 328      * Gets all the system IDs of the documents.
 329      */
 330     public String[] listSystemIDs() {
 331         return core.keySet().toArray(new String[core.keySet().size()]);
 332     }
 333 
 334     /**
 335      * Gets the system ID from which the given DOM is parsed.
 336      * <p/>
 337      * Poor-man's base URI.
 338      */
 339     public String getSystemId(Document dom) {
 340         for (Map.Entry<String, Document> e : core.entrySet()) {
 341             if (e.getValue() == dom)
 342                 return e.getKey();
 343         }
 344         return null;
 345     }
 346 
 347     /**
 348      * Gets the first one (which is more or less random) in {@link #rootDocuments}.
 349      */
 350     public String getFirstRootDocument() {
 351         if(rootDocuments.isEmpty()) return null;
 352         return rootDocuments.iterator().next();
 353     }
 354 
 355     public Set<String> getRootDocuments() {
 356         return rootDocuments;
 357     }
 358 
 359     /**
 360      * Dumps the contents of the forest to the specified stream.
 361      * <p/>
 362      * This is a debug method. As such, error handling is sloppy.
 363      */
 364     public void dump(OutputStream out) throws IOException {
 365         try {
 366             // create identity transformer
 367             // secure xml processing can be switched off if input requires it
 368             boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity;
 369             TransformerFactory tf = XmlUtil.newTransformerFactory(secureProcessingEnabled);
 370             Transformer it = tf.newTransformer();
 371 
 372             for (Map.Entry<String, Document> e : core.entrySet()) {
 373                 out.write(("---<< " + e.getKey() + '\n').getBytes());
 374 
 375                 DataWriter dw = new DataWriter(new OutputStreamWriter(out), null);
 376                 dw.setIndentStep("  ");
 377                 it.transform(new DOMSource(e.getValue()),
 378                         new SAXResult(dw));
 379 
 380                 out.write("\n\n\n".getBytes());
 381             }
 382         } catch (TransformerException e) {
 383             e.printStackTrace();
 384         }
 385     }
 386 
 387 }