1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.ws.wsdl.parser;
  27 
  28 import com.sun.istack.internal.NotNull;
  29 import com.sun.tools.internal.ws.util.xml.XmlUtil;
  30 import com.sun.tools.internal.ws.wscompile.ErrorReceiver;
  31 import com.sun.tools.internal.ws.wscompile.WsimportOptions;
  32 import com.sun.tools.internal.ws.wsdl.document.schema.SchemaConstants;
  33 import com.sun.tools.internal.xjc.reader.internalizer.LocatorTable;
  34 import com.sun.xml.internal.bind.marshaller.DataWriter;
  35 import org.w3c.dom.Document;
  36 import org.w3c.dom.Element;
  37 import org.w3c.dom.NodeList;
  38 import org.xml.sax.ContentHandler;
  39 import org.xml.sax.*;
  40 import org.xml.sax.helpers.XMLFilterImpl;
  41 
  42 import javax.xml.parsers.DocumentBuilder;
  43 import javax.xml.parsers.DocumentBuilderFactory;
  44 import javax.xml.parsers.ParserConfigurationException;
  45 import javax.xml.parsers.SAXParserFactory;
  46 import javax.xml.transform.Transformer;
  47 import javax.xml.transform.TransformerException;
  48 import javax.xml.transform.TransformerFactory;
  49 import javax.xml.transform.dom.DOMSource;
  50 import javax.xml.transform.sax.SAXResult;
  51 import java.io.IOException;
  52 import java.io.InputStream;
  53 import java.io.OutputStream;
  54 import java.io.OutputStreamWriter;
  55 import java.net.*;
  56 import java.util.*;
  57 
  58 /**
  59  * @author Vivek Pandey
  60  */
  61 public class DOMForest {
  62     /**
  63      * To correctly feed documents to a schema parser, we need to remember
  64      * which documents (of the forest) were given as the root
  65      * documents, and which of them are read as included/imported
  66      * documents.
  67      * <p/>
  68      * <p/>
  69      * Set of system ids as strings.
  70      */
  71     protected final Set<String> rootDocuments = new HashSet<String>();
  72 
  73     /**
  74      * Contains wsdl:import(s)
  75      */
  76     protected final Set<String> externalReferences = new HashSet<String>();
  77 
  78     /**
  79      * actual data storage map&lt;SystemId,Document>.
  80      */
  81     protected final Map<String, Document> core = new HashMap<String, Document>();
  82     protected final ErrorReceiver errorReceiver;
  83 
  84     private final DocumentBuilder documentBuilder;
  85     private final SAXParserFactory parserFactory;
  86 
  87     /**
  88      * inlined schema elements inside wsdl:type section
  89      */
  90     protected final List<Element> inlinedSchemaElements = new ArrayList<Element>();
  91 
  92 
  93     /**
  94      * Stores location information for all the trees in this forest.
  95      */
  96     public final LocatorTable locatorTable = new LocatorTable();
  97 
  98     protected final EntityResolver entityResolver;
  99     /**
 100      * Stores all the outer-most &lt;jaxb:bindings> customizations.
 101      */
 102     public final Set<Element> outerMostBindings = new HashSet<Element>();
 103 
 104     /**
 105      * Schema language dependent part of the processing.
 106      */
 107     protected final InternalizationLogic logic;
 108     protected final WsimportOptions options;
 109 
 110     public DOMForest(InternalizationLogic logic, @NotNull EntityResolver entityResolver, WsimportOptions options, ErrorReceiver errReceiver) {
 111         this.options = options;
 112         this.entityResolver = entityResolver;
 113         this.errorReceiver = errReceiver;
 114         this.logic = logic;
 115         try {
 116             // secure xml processing can be switched off if input requires it
 117             boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity;
 118             DocumentBuilderFactory dbf = XmlUtil.newDocumentBuilderFactory(!secureProcessingEnabled);
 119             dbf.setNamespaceAware(true);
 120             this.documentBuilder = dbf.newDocumentBuilder();
 121 
 122             this.parserFactory = XmlUtil.newSAXParserFactory(secureProcessingEnabled);
 123             this.parserFactory.setNamespaceAware(true);
 124 
 125             if(secureProcessingEnabled){
 126                 dbf.setExpandEntityReferences(false);
 127                 try {
 128                 parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
 129                 parserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
 130                 parserFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
 131               } catch (SAXNotRecognizedException e){
 132                 throw new ParserConfigurationException(e.getMessage());
 133               } catch (SAXNotSupportedException e) {
 134                 throw new ParserConfigurationException(e.getMessage());
 135               }
 136             }
 137 
 138         } catch (ParserConfigurationException e) {
 139             throw new AssertionError(e);
 140         }
 141     }
 142 
 143     public List<Element> getInlinedSchemaElement() {
 144         return inlinedSchemaElements;
 145     }
 146 
 147     public @NotNull Document parse(InputSource source, boolean root) throws SAXException, IOException {
 148         if (source.getSystemId() == null)
 149             throw new IllegalArgumentException();
 150         return parse(source.getSystemId(), source, root);
 151     }
 152 
 153     /**
 154      * Parses an XML at the given location (
 155      * and XMLs referenced by it) into DOM trees
 156      * and stores them to this forest.
 157      *
 158      * @return the parsed DOM document object.
 159      */
 160     public Document parse(String systemId, boolean root) throws SAXException, IOException{
 161 
 162         systemId = normalizeSystemId(systemId);
 163 
 164         InputSource is = null;
 165 
 166         // allow entity resolver to find the actual byte stream.
 167         is = entityResolver.resolveEntity(null, systemId);
 168         if (is == null)
 169             is = new InputSource(systemId);
 170         else {
 171             resolvedCache.put(systemId, is.getSystemId());
 172             systemId=is.getSystemId();
 173         }
 174 
 175         if (core.containsKey(systemId)) {
 176             // this document has already been parsed. Just ignore.
 177             return core.get(systemId);
 178         }
 179 
 180         if(!root)
 181             addExternalReferences(systemId);
 182 
 183         // but we still use the original system Id as the key.
 184         return parse(systemId, is, root);
 185     }
 186     protected Map<String,String> resolvedCache = new HashMap<String,String>();
 187 
 188     public Map<String,String> getReferencedEntityMap() {
 189         return resolvedCache;
 190     }
 191     /**
 192      * Parses the given document and add it to the DOM forest.
 193      *
 194      * @return null if there was a parse error. otherwise non-null.
 195      */
 196     private @NotNull Document parse(String systemId, InputSource inputSource, boolean root) throws SAXException, IOException{
 197         Document dom = documentBuilder.newDocument();
 198 
 199         systemId = normalizeSystemId(systemId);
 200 
 201         // put into the map before growing a tree, to
 202         // prevent recursive reference from causing infinite loop.
 203         core.put(systemId, dom);
 204 
 205         dom.setDocumentURI(systemId);
 206         if (root)
 207             rootDocuments.add(systemId);
 208 
 209         try {
 210             XMLReader reader = createReader(dom);
 211 
 212             InputStream is = null;
 213             if(inputSource.getByteStream() == null){
 214                 inputSource = entityResolver.resolveEntity(null, systemId);
 215             }
 216             reader.parse(inputSource);
 217             Element doc = dom.getDocumentElement();
 218             if (doc == null) {
 219                 return null;
 220             }
 221             NodeList schemas = doc.getElementsByTagNameNS(SchemaConstants.NS_XSD, "schema");
 222             for (int i = 0; i < schemas.getLength(); i++) {
 223                 inlinedSchemaElements.add((Element) schemas.item(i));
 224             }
 225         } catch (ParserConfigurationException e) {
 226             errorReceiver.error(e);
 227             throw new SAXException(e.getMessage());
 228         }
 229         resolvedCache.put(systemId, dom.getDocumentURI());
 230         return dom;
 231     }
 232 
 233     public void addExternalReferences(String ref) {
 234         if (!externalReferences.contains(ref))
 235             externalReferences.add(ref);
 236     }
 237 
 238 
 239     public Set<String> getExternalReferences() {
 240         return externalReferences;
 241     }
 242 
 243 
 244 
 245     public interface Handler extends ContentHandler {
 246         /**
 247          * Gets the DOM that was built.
 248          */
 249         public Document getDocument();
 250     }
 251 
 252     /**
 253          * Returns a {@link org.xml.sax.XMLReader} to parse a document into this DOM forest.
 254          * <p/>
 255          * This version requires that the DOM object to be created and registered
 256          * to the map beforehand.
 257          */
 258     private XMLReader createReader(Document dom) throws SAXException, ParserConfigurationException {
 259         XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 260         DOMBuilder dombuilder = new DOMBuilder(dom, locatorTable, outerMostBindings);
 261         try {
 262             reader.setProperty("http://xml.org/sax/properties/lexical-handler", dombuilder);
 263         } catch(SAXException e) {
 264             errorReceiver.debug(e.getMessage());
 265         }
 266 
 267         ContentHandler handler = new WhitespaceStripper(dombuilder, errorReceiver, entityResolver);
 268         handler = new VersionChecker(handler, errorReceiver, entityResolver);
 269 
 270         // insert the reference finder so that
 271         // included/imported schemas will be also parsed
 272         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 273         f.setContentHandler(handler);
 274         if (errorReceiver != null)
 275             f.setErrorHandler(errorReceiver);
 276         f.setEntityResolver(entityResolver);
 277 
 278         reader.setContentHandler(f);
 279         if (errorReceiver != null)
 280             reader.setErrorHandler(errorReceiver);
 281         reader.setEntityResolver(entityResolver);
 282         return reader;
 283     }
 284 
 285     private String normalizeSystemId(String systemId) {
 286         try {
 287             systemId = new URI(systemId).normalize().toString();
 288         } catch (URISyntaxException e) {
 289             // leave the system ID untouched. In my experience URI is often too strict
 290         }
 291         return systemId;
 292     }
 293 
 294     boolean isExtensionMode() {
 295         return options.isExtensionMode();
 296     }
 297 
 298 
 299     /**
 300      * Gets the DOM tree associated with the specified system ID,
 301      * or null if none is found.
 302      */
 303     public Document get(String systemId) {
 304         Document doc = core.get(systemId);
 305 
 306         if (doc == null && systemId.startsWith("file:/") && !systemId.startsWith("file://")) {
 307             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 308             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 309             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 310             // and this descripancy breaks DOM look up by system ID.
 311 
 312             // this extra check solves this problem.
 313             doc = core.get("file://" + systemId.substring(5));
 314         }
 315 
 316         if (doc == null && systemId.startsWith("file:")) {
 317             // on Windows, filenames are case insensitive.
 318             // perform case-insensitive search for improved user experience
 319             String systemPath = getPath(systemId);
 320             for (String key : core.keySet()) {
 321                 if (key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 322                     doc = core.get(key);
 323                     break;
 324                 }
 325             }
 326         }
 327 
 328         return doc;
 329     }
 330 
 331     /**
 332      * Strips off the leading 'file:///' portion from an URL.
 333      */
 334     private String getPath(String key) {
 335         key = key.substring(5); // skip 'file:'
 336         while (key.length() > 0 && key.charAt(0) == '/')
 337             key = key.substring(1);
 338         return key;
 339     }
 340 
 341     /**
 342      * Gets all the system IDs of the documents.
 343      */
 344     public String[] listSystemIDs() {
 345         return core.keySet().toArray(new String[core.keySet().size()]);
 346     }
 347 
 348     /**
 349      * Gets the system ID from which the given DOM is parsed.
 350      * <p/>
 351      * Poor-man's base URI.
 352      */
 353     public String getSystemId(Document dom) {
 354         for (Map.Entry<String, Document> e : core.entrySet()) {
 355             if (e.getValue() == dom)
 356                 return e.getKey();
 357         }
 358         return null;
 359     }
 360 
 361     /**
 362      * Gets the first one (which is more or less random) in {@link #rootDocuments}.
 363      */
 364     public String getFirstRootDocument() {
 365         if(rootDocuments.isEmpty()) return null;
 366         return rootDocuments.iterator().next();
 367     }
 368 
 369     public Set<String> getRootDocuments() {
 370         return rootDocuments;
 371     }
 372 
 373     /**
 374      * Dumps the contents of the forest to the specified stream.
 375      * <p/>
 376      * This is a debug method. As such, error handling is sloppy.
 377      */
 378     public void dump(OutputStream out) throws IOException {
 379         try {
 380             // create identity transformer
 381             // secure xml processing can be switched off if input requires it
 382             boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity;
 383             TransformerFactory tf = XmlUtil.newTransformerFactory(secureProcessingEnabled);
 384             Transformer it = tf.newTransformer();
 385 
 386             for (Map.Entry<String, Document> e : core.entrySet()) {
 387                 out.write(("---<< " + e.getKey() + '\n').getBytes());
 388 
 389                 DataWriter dw = new DataWriter(new OutputStreamWriter(out), null);
 390                 dw.setIndentStep("  ");
 391                 it.transform(new DOMSource(e.getValue()),
 392                         new SAXResult(dw));
 393 
 394                 out.write("\n\n\n".getBytes());
 395             }
 396         } catch (TransformerException e) {
 397             e.printStackTrace();
 398         }
 399     }
 400 
 401 }