1 /*
   2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.ws.wsdl.parser;
  27 
  28 import com.sun.istack.internal.NotNull;
  29 import com.sun.tools.internal.ws.resources.WscompileMessages;
  30 import com.sun.tools.internal.ws.wscompile.AbortException;
  31 import com.sun.tools.internal.ws.wscompile.DefaultAuthenticator;
  32 import com.sun.tools.internal.ws.wscompile.ErrorReceiver;
  33 import com.sun.tools.internal.ws.wscompile.WsimportOptions;
  34 import com.sun.tools.internal.ws.wsdl.document.schema.SchemaConstants;
  35 import com.sun.tools.internal.xjc.reader.internalizer.LocatorTable;
  36 import com.sun.xml.internal.bind.marshaller.DataWriter;
  37 import com.sun.xml.internal.ws.util.JAXWSUtils;
  38 import org.w3c.dom.Document;
  39 import org.w3c.dom.Element;
  40 import org.w3c.dom.NodeList;
  41 import org.xml.sax.ContentHandler;
  42 import org.xml.sax.*;
  43 import org.xml.sax.helpers.XMLFilterImpl;
  44 
  45 import javax.xml.parsers.DocumentBuilder;
  46 import javax.xml.parsers.DocumentBuilderFactory;
  47 import javax.xml.parsers.ParserConfigurationException;
  48 import javax.xml.parsers.SAXParserFactory;
  49 import javax.xml.transform.Transformer;
  50 import javax.xml.transform.TransformerException;
  51 import javax.xml.transform.TransformerFactory;
  52 import javax.xml.transform.dom.DOMSource;
  53 import javax.xml.transform.sax.SAXResult;
  54 import javax.net.ssl.HttpsURLConnection;
  55 import javax.net.ssl.HostnameVerifier;
  56 import javax.net.ssl.SSLSession;
  57 import java.io.IOException;
  58 import java.io.InputStream;
  59 import java.io.OutputStream;
  60 import java.io.OutputStreamWriter;
  61 import java.net.*;
  62 import java.util.*;
  63 
  64 /**
  65  * @author Vivek Pandey
  66  */
  67 public class DOMForest {
  68     /**
  69      * To correctly feed documents to a schema parser, we need to remember
  70      * which documents (of the forest) were given as the root
  71      * documents, and which of them are read as included/imported
  72      * documents.
  73      * <p/>
  74      * <p/>
  75      * Set of system ids as strings.
  76      */
  77     protected final Set<String> rootDocuments = new HashSet<String>();
  78 
  79     /**
  80      * Contains wsdl:import(s)
  81      */
  82     protected final Set<String> externalReferences = new HashSet<String>();
  83 
  84     /**
  85      * actual data storage map&lt;SystemId,Document>.
  86      */
  87     protected final Map<String, Document> core = new HashMap<String, Document>();
  88     protected final ErrorReceiver errorReceiver;
  89 
  90     private final DocumentBuilder documentBuilder;
  91     private final SAXParserFactory parserFactory;
  92 
  93     /**
  94      * inlined schema elements inside wsdl:type section
  95      */
  96     protected final List<Element> inlinedSchemaElements = new ArrayList<Element>();
  97 
  98 
  99     /**
 100      * Stores location information for all the trees in this forest.
 101      */
 102     public final LocatorTable locatorTable = new LocatorTable();
 103 
 104     protected final EntityResolver entityResolver;
 105     /**
 106      * Stores all the outer-most &lt;jaxb:bindings> customizations.
 107      */
 108     public final Set<Element> outerMostBindings = new HashSet<Element>();
 109 
 110     /**
 111      * Schema language dependent part of the processing.
 112      */
 113     protected final InternalizationLogic logic;
 114     protected final WsimportOptions options;
 115 
 116     public DOMForest(InternalizationLogic logic, @NotNull EntityResolver entityResolver, WsimportOptions options, ErrorReceiver errReceiver) {
 117         this.options = options;
 118         this.entityResolver = entityResolver;
 119         this.errorReceiver = errReceiver;
 120         this.logic = logic;
 121         try {
 122             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
 123             dbf.setNamespaceAware(true);
 124             this.documentBuilder = dbf.newDocumentBuilder();
 125 
 126             this.parserFactory = SAXParserFactory.newInstance();
 127             this.parserFactory.setNamespaceAware(true);
 128         } catch (ParserConfigurationException e) {
 129             throw new AssertionError(e);
 130         }
 131     }
 132 
 133     public List<Element> getInlinedSchemaElement() {
 134         return inlinedSchemaElements;
 135     }
 136 
 137     public @NotNull Document parse(InputSource source, boolean root) throws SAXException, IOException {
 138         if (source.getSystemId() == null)
 139             throw new IllegalArgumentException();
 140         return parse(source.getSystemId(), source, root);
 141     }
 142 
 143     /**
 144      * Parses an XML at the given location (
 145      * and XMLs referenced by it) into DOM trees
 146      * and stores them to this forest.
 147      *
 148      * @return the parsed DOM document object.
 149      */
 150     public Document parse(String systemId, boolean root) throws SAXException, IOException{
 151 
 152         systemId = normalizeSystemId(systemId);
 153 
 154         InputSource is = null;
 155 
 156         // allow entity resolver to find the actual byte stream.
 157         is = entityResolver.resolveEntity(null, systemId);
 158         if (is == null)
 159             is = new InputSource(systemId);
 160         else {
 161             resolvedCache.put(systemId, is.getSystemId());
 162             systemId=is.getSystemId();
 163         }
 164 
 165         if (core.containsKey(systemId)) {
 166             // this document has already been parsed. Just ignore.
 167             return core.get(systemId);
 168         }
 169 
 170         if(!root)
 171             addExternalReferences(systemId);
 172 
 173         // but we still use the original system Id as the key.
 174         return parse(systemId, is, root);
 175     }
 176     protected Map<String,String> resolvedCache = new HashMap<String,String>();
 177 
 178     public Map<String,String> getReferencedEntityMap() {
 179         return resolvedCache;
 180     }
 181     /**
 182      * Parses the given document and add it to the DOM forest.
 183      *
 184      * @return null if there was a parse error. otherwise non-null.
 185      */
 186     private @NotNull Document parse(String systemId, InputSource inputSource, boolean root) throws SAXException, IOException{
 187         Document dom = documentBuilder.newDocument();
 188 
 189         systemId = normalizeSystemId(systemId);
 190 
 191         // put into the map before growing a tree, to
 192         // prevent recursive reference from causing infinite loop.
 193         core.put(systemId, dom);
 194 
 195         dom.setDocumentURI(systemId);
 196         if (root)
 197             rootDocuments.add(systemId);
 198 
 199         try {
 200             XMLReader reader = createReader(dom);
 201 
 202             InputStream is = null;
 203             if(inputSource.getByteStream() == null){
 204                 inputSource = entityResolver.resolveEntity(null, systemId);
 205             }
 206             reader.parse(inputSource);
 207             Element doc = dom.getDocumentElement();
 208             if (doc == null) {
 209                 return null;
 210             }
 211             NodeList schemas = doc.getElementsByTagNameNS(SchemaConstants.NS_XSD, "schema");
 212             for (int i = 0; i < schemas.getLength(); i++) {
 213                 inlinedSchemaElements.add((Element) schemas.item(i));
 214             }
 215         } catch (ParserConfigurationException e) {
 216             errorReceiver.error(e);
 217             throw new SAXException(e.getMessage());
 218         }
 219         resolvedCache.put(systemId, dom.getDocumentURI());
 220         return dom;
 221     }
 222 
 223     public void addExternalReferences(String ref) {
 224         if (!externalReferences.contains(ref))
 225             externalReferences.add(ref);
 226     }
 227 
 228 
 229     public Set<String> getExternalReferences() {
 230         return externalReferences;
 231     }
 232 
 233 
 234 
 235     public interface Handler extends ContentHandler {
 236         /**
 237          * Gets the DOM that was built.
 238          */
 239         public Document getDocument();
 240     }
 241 
 242     /**
 243          * Returns a {@link org.xml.sax.XMLReader} to parse a document into this DOM forest.
 244          * <p/>
 245          * This version requires that the DOM object to be created and registered
 246          * to the map beforehand.
 247          */
 248     private XMLReader createReader(Document dom) throws SAXException, ParserConfigurationException {
 249         XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 250         DOMBuilder dombuilder = new DOMBuilder(dom, locatorTable, outerMostBindings);
 251         try {
 252             reader.setProperty("http://xml.org/sax/properties/lexical-handler", dombuilder);
 253         } catch(SAXException e) {
 254             errorReceiver.debug(e.getMessage());
 255         }
 256 
 257         ContentHandler handler = new WhitespaceStripper(dombuilder, errorReceiver, entityResolver);
 258         handler = new VersionChecker(handler, errorReceiver, entityResolver);
 259 
 260         // insert the reference finder so that
 261         // included/imported schemas will be also parsed
 262         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 263         f.setContentHandler(handler);
 264         if (errorReceiver != null)
 265             f.setErrorHandler(errorReceiver);
 266         f.setEntityResolver(entityResolver);
 267 
 268         reader.setContentHandler(f);
 269         if (errorReceiver != null)
 270             reader.setErrorHandler(errorReceiver);
 271         reader.setEntityResolver(entityResolver);
 272         return reader;
 273     }
 274 
 275     private String normalizeSystemId(String systemId) {
 276         try {
 277             systemId = new URI(systemId).normalize().toString();
 278         } catch (URISyntaxException e) {
 279             // leave the system ID untouched. In my experience URI is often too strict
 280         }
 281         return systemId;
 282     }
 283 
 284     boolean isExtensionMode() {
 285         return options.isExtensionMode();
 286     }
 287 
 288 
 289     /**
 290      * Gets the DOM tree associated with the specified system ID,
 291      * or null if none is found.
 292      */
 293     public Document get(String systemId) {
 294         Document doc = core.get(systemId);
 295 
 296         if (doc == null && systemId.startsWith("file:/") && !systemId.startsWith("file://")) {
 297             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 298             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 299             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 300             // and this descripancy breaks DOM look up by system ID.
 301 
 302             // this extra check solves this problem.
 303             doc = core.get("file://" + systemId.substring(5));
 304         }
 305 
 306         if (doc == null && systemId.startsWith("file:")) {
 307             // on Windows, filenames are case insensitive.
 308             // perform case-insensitive search for improved user experience
 309             String systemPath = getPath(systemId);
 310             for (String key : core.keySet()) {
 311                 if (key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 312                     doc = core.get(key);
 313                     break;
 314                 }
 315             }
 316         }
 317 
 318         return doc;
 319     }
 320 
 321     /**
 322      * Strips off the leading 'file:///' portion from an URL.
 323      */
 324     private String getPath(String key) {
 325         key = key.substring(5); // skip 'file:'
 326         while (key.length() > 0 && key.charAt(0) == '/')
 327             key = key.substring(1);
 328         return key;
 329     }
 330 
 331     /**
 332      * Gets all the system IDs of the documents.
 333      */
 334     public String[] listSystemIDs() {
 335         return core.keySet().toArray(new String[core.keySet().size()]);
 336     }
 337 
 338     /**
 339      * Gets the system ID from which the given DOM is parsed.
 340      * <p/>
 341      * Poor-man's base URI.
 342      */
 343     public String getSystemId(Document dom) {
 344         for (Map.Entry<String, Document> e : core.entrySet()) {
 345             if (e.getValue() == dom)
 346                 return e.getKey();
 347         }
 348         return null;
 349     }
 350 
 351     /**
 352      * Gets the first one (which is more or less random) in {@link #rootDocuments}.
 353      */
 354     public String getFirstRootDocument() {
 355         if(rootDocuments.isEmpty()) return null;
 356         return rootDocuments.iterator().next();
 357     }
 358 
 359     public Set<String> getRootDocuments() {
 360         return rootDocuments;
 361     }
 362 
 363     /**
 364      * Dumps the contents of the forest to the specified stream.
 365      * <p/>
 366      * This is a debug method. As such, error handling is sloppy.
 367      */
 368     public void dump(OutputStream out) throws IOException {
 369         try {
 370             // create identity transformer
 371             Transformer it = TransformerFactory.newInstance().newTransformer();
 372 
 373             for (Map.Entry<String, Document> e : core.entrySet()) {
 374                 out.write(("---<< " + e.getKey() + '\n').getBytes());
 375 
 376                 DataWriter dw = new DataWriter(new OutputStreamWriter(out), null);
 377                 dw.setIndentStep("  ");
 378                 it.transform(new DOMSource(e.getValue()),
 379                         new SAXResult(dw));
 380 
 381                 out.write("\n\n\n".getBytes());
 382             }
 383         } catch (TransformerException e) {
 384             e.printStackTrace();
 385         }
 386     }
 387 
 388 }