1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.internal.ws.wsdl.parser; 27 28 import com.sun.istack.internal.NotNull; 29 import com.sun.tools.internal.ws.util.xml.XmlUtil; 30 import com.sun.tools.internal.ws.wscompile.ErrorReceiver; 31 import com.sun.tools.internal.ws.wscompile.WsimportOptions; 32 import com.sun.tools.internal.ws.wsdl.document.schema.SchemaConstants; 33 import com.sun.tools.internal.xjc.reader.internalizer.LocatorTable; 34 import com.sun.xml.internal.bind.marshaller.DataWriter; 35 import org.w3c.dom.Document; 36 import org.w3c.dom.Element; 37 import org.w3c.dom.NodeList; 38 import org.xml.sax.ContentHandler; 39 import org.xml.sax.*; 40 import org.xml.sax.helpers.XMLFilterImpl; 41 42 import javax.xml.parsers.DocumentBuilder; 43 import javax.xml.parsers.DocumentBuilderFactory; 44 import javax.xml.parsers.ParserConfigurationException; 45 import javax.xml.parsers.SAXParserFactory; 46 import javax.xml.transform.Transformer; 47 import javax.xml.transform.TransformerException; 48 import javax.xml.transform.TransformerFactory; 49 import javax.xml.transform.dom.DOMSource; 50 import javax.xml.transform.sax.SAXResult; 51 import java.io.IOException; 52 import java.io.InputStream; 53 import java.io.OutputStream; 54 import java.io.OutputStreamWriter; 55 import java.net.*; 56 import java.util.*; 57 58 /** 59 * @author Vivek Pandey 60 */ 61 public class DOMForest { 62 /** 63 * To correctly feed documents to a schema parser, we need to remember 64 * which documents (of the forest) were given as the root 65 * documents, and which of them are read as included/imported 66 * documents. 67 * <p/> 68 * <p/> 69 * Set of system ids as strings. 70 */ 71 protected final Set<String> rootDocuments = new HashSet<String>(); 72 73 /** 74 * Contains wsdl:import(s) 75 */ 76 protected final Set<String> externalReferences = new HashSet<String>(); 77 78 /** 79 * actual data storage map<SystemId,Document>. 80 */ 81 protected final Map<String, Document> core = new HashMap<String, Document>(); 82 protected final ErrorReceiver errorReceiver; 83 84 private final DocumentBuilder documentBuilder; 85 private final SAXParserFactory parserFactory; 86 87 /** 88 * inlined schema elements inside wsdl:type section 89 */ 90 protected final List<Element> inlinedSchemaElements = new ArrayList<Element>(); 91 92 93 /** 94 * Stores location information for all the trees in this forest. 95 */ 96 public final LocatorTable locatorTable = new LocatorTable(); 97 98 protected final EntityResolver entityResolver; 99 /** 100 * Stores all the outer-most <jaxb:bindings> customizations. 101 */ 102 public final Set<Element> outerMostBindings = new HashSet<Element>(); 103 104 /** 105 * Schema language dependent part of the processing. 106 */ 107 protected final InternalizationLogic logic; 108 protected final WsimportOptions options; 109 110 public DOMForest(InternalizationLogic logic, @NotNull EntityResolver entityResolver, WsimportOptions options, ErrorReceiver errReceiver) { 111 this.options = options; 112 this.entityResolver = entityResolver; 113 this.errorReceiver = errReceiver; 114 this.logic = logic; 115 try { 116 // secure xml processing can be switched off if input requires it 117 boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity; 118 DocumentBuilderFactory dbf = XmlUtil.newDocumentBuilderFactory(!secureProcessingEnabled); 119 dbf.setNamespaceAware(true); 120 this.documentBuilder = dbf.newDocumentBuilder(); 121 122 this.parserFactory = XmlUtil.newSAXParserFactory(secureProcessingEnabled); 123 this.parserFactory.setNamespaceAware(true); 124 125 if(secureProcessingEnabled){ 126 dbf.setExpandEntityReferences(false); 127 try { 128 parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 129 parserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); 130 parserFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 131 } catch (SAXNotRecognizedException e){ 132 throw new ParserConfigurationException(e.getMessage()); 133 } catch (SAXNotSupportedException e) { 134 throw new ParserConfigurationException(e.getMessage()); 135 } 136 } 137 138 } catch (ParserConfigurationException e) { 139 throw new AssertionError(e); 140 } 141 } 142 143 public List<Element> getInlinedSchemaElement() { 144 return inlinedSchemaElements; 145 } 146 147 public @NotNull Document parse(InputSource source, boolean root) throws SAXException, IOException { 148 if (source.getSystemId() == null) 149 throw new IllegalArgumentException(); 150 return parse(source.getSystemId(), source, root); 151 } 152 153 /** 154 * Parses an XML at the given location ( 155 * and XMLs referenced by it) into DOM trees 156 * and stores them to this forest. 157 * 158 * @return the parsed DOM document object. 159 */ 160 public Document parse(String systemId, boolean root) throws SAXException, IOException{ 161 162 systemId = normalizeSystemId(systemId); 163 164 InputSource is = null; 165 166 // allow entity resolver to find the actual byte stream. 167 is = entityResolver.resolveEntity(null, systemId); 168 if (is == null) 169 is = new InputSource(systemId); 170 else { 171 resolvedCache.put(systemId, is.getSystemId()); 172 systemId=is.getSystemId(); 173 } 174 175 if (core.containsKey(systemId)) { 176 // this document has already been parsed. Just ignore. 177 return core.get(systemId); 178 } 179 180 if(!root) 181 addExternalReferences(systemId); 182 183 // but we still use the original system Id as the key. 184 return parse(systemId, is, root); 185 } 186 protected Map<String,String> resolvedCache = new HashMap<String,String>(); 187 188 public Map<String,String> getReferencedEntityMap() { 189 return resolvedCache; 190 } 191 /** 192 * Parses the given document and add it to the DOM forest. 193 * 194 * @return null if there was a parse error. otherwise non-null. 195 */ 196 private @NotNull Document parse(String systemId, InputSource inputSource, boolean root) throws SAXException, IOException{ 197 Document dom = documentBuilder.newDocument(); 198 199 systemId = normalizeSystemId(systemId); 200 201 // put into the map before growing a tree, to 202 // prevent recursive reference from causing infinite loop. 203 core.put(systemId, dom); 204 205 dom.setDocumentURI(systemId); 206 if (root) 207 rootDocuments.add(systemId); 208 209 try { 210 XMLReader reader = createReader(dom); 211 212 InputStream is = null; 213 if(inputSource.getByteStream() == null){ 214 inputSource = entityResolver.resolveEntity(null, systemId); 215 } 216 reader.parse(inputSource); 217 Element doc = dom.getDocumentElement(); 218 if (doc == null) { 219 return null; 220 } 221 NodeList schemas = doc.getElementsByTagNameNS(SchemaConstants.NS_XSD, "schema"); 222 for (int i = 0; i < schemas.getLength(); i++) { 223 inlinedSchemaElements.add((Element) schemas.item(i)); 224 } 225 } catch (ParserConfigurationException e) { 226 errorReceiver.error(e); 227 throw new SAXException(e.getMessage()); 228 } 229 resolvedCache.put(systemId, dom.getDocumentURI()); 230 return dom; 231 } 232 233 public void addExternalReferences(String ref) { 234 if (!externalReferences.contains(ref)) 235 externalReferences.add(ref); 236 } 237 238 239 public Set<String> getExternalReferences() { 240 return externalReferences; 241 } 242 243 244 245 public interface Handler extends ContentHandler { 246 /** 247 * Gets the DOM that was built. 248 */ 249 public Document getDocument(); 250 } 251 252 /** 253 * Returns a {@link org.xml.sax.XMLReader} to parse a document into this DOM forest. 254 * <p/> 255 * This version requires that the DOM object to be created and registered 256 * to the map beforehand. 257 */ 258 private XMLReader createReader(Document dom) throws SAXException, ParserConfigurationException { 259 XMLReader reader = parserFactory.newSAXParser().getXMLReader(); 260 DOMBuilder dombuilder = new DOMBuilder(dom, locatorTable, outerMostBindings); 261 try { 262 reader.setProperty("http://xml.org/sax/properties/lexical-handler", dombuilder); 263 } catch(SAXException e) { 264 errorReceiver.debug(e.getMessage()); 265 } 266 267 ContentHandler handler = new WhitespaceStripper(dombuilder, errorReceiver, entityResolver); 268 handler = new VersionChecker(handler, errorReceiver, entityResolver); 269 270 // insert the reference finder so that 271 // included/imported schemas will be also parsed 272 XMLFilterImpl f = logic.createExternalReferenceFinder(this); 273 f.setContentHandler(handler); 274 if (errorReceiver != null) 275 f.setErrorHandler(errorReceiver); 276 f.setEntityResolver(entityResolver); 277 278 reader.setContentHandler(f); 279 if (errorReceiver != null) 280 reader.setErrorHandler(errorReceiver); 281 reader.setEntityResolver(entityResolver); 282 return reader; 283 } 284 285 private String normalizeSystemId(String systemId) { 286 try { 287 systemId = new URI(systemId).normalize().toString(); 288 } catch (URISyntaxException e) { 289 // leave the system ID untouched. In my experience URI is often too strict 290 } 291 return systemId; 292 } 293 294 boolean isExtensionMode() { 295 return options.isExtensionMode(); 296 } 297 298 299 /** 300 * Gets the DOM tree associated with the specified system ID, 301 * or null if none is found. 302 */ 303 public Document get(String systemId) { 304 Document doc = core.get(systemId); 305 306 if (doc == null && systemId.startsWith("file:/") && !systemId.startsWith("file://")) { 307 // As of JDK1.4, java.net.URL.toExternal method returns URLs like 308 // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. 309 // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), 310 // and this descripancy breaks DOM look up by system ID. 311 312 // this extra check solves this problem. 313 doc = core.get("file://" + systemId.substring(5)); 314 } 315 316 if (doc == null && systemId.startsWith("file:")) { 317 // on Windows, filenames are case insensitive. 318 // perform case-insensitive search for improved user experience 319 String systemPath = getPath(systemId); 320 for (String key : core.keySet()) { 321 if (key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { 322 doc = core.get(key); 323 break; 324 } 325 } 326 } 327 328 return doc; 329 } 330 331 /** 332 * Strips off the leading 'file:///' portion from an URL. 333 */ 334 private String getPath(String key) { 335 key = key.substring(5); // skip 'file:' 336 while (key.length() > 0 && key.charAt(0) == '/') 337 key = key.substring(1); 338 return key; 339 } 340 341 /** 342 * Gets all the system IDs of the documents. 343 */ 344 public String[] listSystemIDs() { 345 return core.keySet().toArray(new String[core.keySet().size()]); 346 } 347 348 /** 349 * Gets the system ID from which the given DOM is parsed. 350 * <p/> 351 * Poor-man's base URI. 352 */ 353 public String getSystemId(Document dom) { 354 for (Map.Entry<String, Document> e : core.entrySet()) { 355 if (e.getValue() == dom) 356 return e.getKey(); 357 } 358 return null; 359 } 360 361 /** 362 * Gets the first one (which is more or less random) in {@link #rootDocuments}. 363 */ 364 public String getFirstRootDocument() { 365 if(rootDocuments.isEmpty()) return null; 366 return rootDocuments.iterator().next(); 367 } 368 369 public Set<String> getRootDocuments() { 370 return rootDocuments; 371 } 372 373 /** 374 * Dumps the contents of the forest to the specified stream. 375 * <p/> 376 * This is a debug method. As such, error handling is sloppy. 377 */ 378 public void dump(OutputStream out) throws IOException { 379 try { 380 // create identity transformer 381 // secure xml processing can be switched off if input requires it 382 boolean secureProcessingEnabled = options == null || !options.disableXmlSecurity; 383 TransformerFactory tf = XmlUtil.newTransformerFactory(secureProcessingEnabled); 384 Transformer it = tf.newTransformer(); 385 386 for (Map.Entry<String, Document> e : core.entrySet()) { 387 out.write(("---<< " + e.getKey() + '\n').getBytes()); 388 389 DataWriter dw = new DataWriter(new OutputStreamWriter(out), null); 390 dw.setIndentStep(" "); 391 it.transform(new DOMSource(e.getValue()), 392 new SAXResult(dw)); 393 394 out.write("\n\n\n".getBytes()); 395 } 396 } catch (TransformerException e) { 397 e.printStackTrace(); 398 } 399 } 400 401 }