1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.xjc.reader.internalizer;
  27 
  28 import com.sun.istack.internal.NotNull;
  29 import com.sun.istack.internal.XMLStreamReaderToContentHandler;
  30 import com.sun.tools.internal.xjc.ErrorReceiver;
  31 import com.sun.tools.internal.xjc.Options;
  32 import com.sun.tools.internal.xjc.reader.Const;
  33 import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
  34 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
  35 import com.sun.xml.internal.bind.marshaller.DataWriter;
  36 import com.sun.xml.internal.bind.v2.util.XmlFactory;
  37 import com.sun.xml.internal.xsom.parser.JAXPParser;
  38 import com.sun.xml.internal.xsom.parser.XMLParser;
  39 import org.w3c.dom.Document;
  40 import org.w3c.dom.Element;
  41 import org.xml.sax.*;
  42 import org.xml.sax.helpers.XMLFilterImpl;
  43 
  44 import javax.xml.parsers.DocumentBuilder;
  45 import javax.xml.parsers.DocumentBuilderFactory;
  46 import javax.xml.parsers.ParserConfigurationException;
  47 import javax.xml.parsers.SAXParserFactory;
  48 import javax.xml.stream.XMLStreamException;
  49 import javax.xml.stream.XMLStreamReader;
  50 import javax.xml.transform.Source;
  51 import javax.xml.transform.Transformer;
  52 import javax.xml.transform.TransformerException;
  53 import javax.xml.transform.TransformerFactory;
  54 import javax.xml.transform.dom.DOMSource;
  55 import javax.xml.transform.sax.SAXResult;
  56 import javax.xml.transform.sax.SAXSource;
  57 import javax.xml.validation.SchemaFactory;
  58 import java.io.IOException;
  59 import java.io.OutputStream;
  60 import java.io.OutputStreamWriter;
  61 import java.util.*;
  62 
  63 import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess;
  64 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
  65 
  66 
  67 /**
  68  * Builds a DOM forest and maintains association from
  69  * system IDs to DOM trees.
  70  *
  71  * <p>
  72  * A forest is a transitive reflexive closure of referenced documents.
  73  * IOW, if a document is in a forest, all the documents referenced from
  74  * it is in a forest, too. To support this semantics, {@link DOMForest}
  75  * uses {@link InternalizationLogic} to find referenced documents.
  76  *
  77  * <p>
  78  * Some documents are marked as "root"s, meaning those documents were
  79  * put into a forest explicitly, not because it is referenced from another
  80  * document. (However, a root document can be referenced from other
  81  * documents, too.)
  82  *
  83  * @author
  84  *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
  85  */
  86 public final class DOMForest {
  87     /** actual data storage {@code map<SystemId,Document>}. */
  88     private final Map<String,Document> core = new LinkedHashMap<>();
  89 
  90     /**
  91      * To correctly feed documents to a schema parser, we need to remember
  92      * which documents (of the forest) were given as the root
  93      * documents, and which of them are read as included/imported
  94      * documents.
  95      *
  96      * <p>
  97      * Set of system ids as strings.
  98      */
  99     private final Set<String> rootDocuments = new LinkedHashSet<String>();
 100 
 101     /** Stores location information for all the trees in this forest. */
 102     public final LocatorTable locatorTable = new LocatorTable();
 103 
 104     /** Stores all the outer-most {@code <jaxb:bindings>} customizations. */
 105     public final Set<Element> outerMostBindings = new HashSet<Element>();
 106 
 107     /** Used to resolve references to other schema documents. */
 108     private EntityResolver entityResolver = null;
 109 
 110     /** Errors encountered during the parsing will be sent to this object. */
 111     private ErrorReceiver errorReceiver = null;
 112 
 113     /** Schema language dependent part of the processing. */
 114     protected final InternalizationLogic logic;
 115 
 116     private final SAXParserFactory parserFactory;
 117     private final DocumentBuilder documentBuilder;
 118 
 119     private final Options options;
 120 
 121     public DOMForest(
 122         SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
 123         InternalizationLogic logic ) {
 124 
 125         this.parserFactory = parserFactory;
 126         this.documentBuilder = documentBuilder;
 127         this.logic = logic;
 128         this.options = null;
 129     }
 130 
 131     public DOMForest( InternalizationLogic logic, Options opt ) {
 132 
 133         if (opt == null) throw new AssertionError("Options object null");
 134         this.options = opt;
 135 
 136         try {
 137             DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
 138             this.documentBuilder = dbf.newDocumentBuilder();
 139             this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
 140         } catch( ParserConfigurationException e ) {
 141             throw new AssertionError(e);
 142         }
 143 
 144         this.logic = logic;
 145     }
 146 
 147     /**
 148      * Gets the DOM tree associated with the specified system ID,
 149      * or null if none is found.
 150      */
 151     public Document get( String systemId ) {
 152         Document doc = core.get(systemId);
 153 
 154         if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
 155             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 156             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 157             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 158             // and this descripancy breaks DOM look up by system ID.
 159 
 160             // this extra check solves this problem.
 161             doc = core.get( "file://"+systemId.substring(5) );
 162         }
 163 
 164         if( doc==null && systemId.startsWith("file:") ) {
 165             // on Windows, filenames are case insensitive.
 166             // perform case-insensitive search for improved user experience
 167             String systemPath = getPath(systemId);
 168             for (String key : core.keySet()) {
 169                 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 170                     doc = core.get(key);
 171                     break;
 172                 }
 173             }
 174         }
 175 
 176         return doc;
 177     }
 178 
 179     /**
 180      * Strips off the leading 'file:///' portion from an URL.
 181      */
 182     private String getPath(String key) {
 183         key = key.substring(5); // skip 'file:'
 184         while(key.length()>0 && key.charAt(0)=='/') {
 185             key = key.substring(1);
 186         }
 187         return key;
 188     }
 189 
 190     /**
 191      * Returns a read-only set of root document system IDs.
 192      */
 193     public Set<String> getRootDocuments() {
 194         return Collections.unmodifiableSet(rootDocuments);
 195     }
 196 
 197     /**
 198      * Picks one document at random and returns it.
 199      */
 200     public Document getOneDocument() {
 201         for (Document dom : core.values()) {
 202             if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 203                 return dom;
 204         }
 205         // we should have caught this error very early on
 206         throw new AssertionError();
 207     }
 208 
 209     /**
 210      * Checks the correctness of the XML Schema documents and return true
 211      * if it's OK.
 212      *
 213      * <p>
 214      * This method performs a weaker version of the tests where error messages
 215      * are provided without line number information. So whenever possible
 216      * use {@link SchemaConstraintChecker}.
 217      *
 218      * @see SchemaConstraintChecker
 219      */
 220     public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
 221         try {
 222             boolean disableXmlSecurity = false;
 223             if (options != null) {
 224                 disableXmlSecurity = options.disableXmlSecurity;
 225             }
 226             SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
 227             ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
 228             sf.setErrorHandler(filter);
 229             Set<String> roots = getRootDocuments();
 230             Source[] sources = new Source[roots.size()];
 231             int i=0;
 232             for (String root : roots) {
 233                 sources[i++] = new DOMSource(get(root),root);
 234             }
 235             sf.newSchema(sources);
 236             return !filter.hadError();
 237         } catch (SAXException e) {
 238             // the errors should have been reported
 239             return false;
 240         }
 241     }
 242 
 243     /**
 244      * Gets the system ID from which the given DOM is parsed.
 245      * <p>
 246      * Poor-man's base URI.
 247      */
 248     public String getSystemId( Document dom ) {
 249         for (Map.Entry<String,Document> e : core.entrySet()) {
 250             if (e.getValue() == dom)
 251                 return e.getKey();
 252         }
 253         return null;
 254     }
 255 
 256     public Document parse( InputSource source, boolean root ) throws SAXException {
 257         if( source.getSystemId()==null )
 258             throw new IllegalArgumentException();
 259 
 260         return parse( source.getSystemId(), source, root );
 261     }
 262 
 263     /**
 264      * Parses an XML at the given location (
 265      * and XMLs referenced by it) into DOM trees
 266      * and stores them to this forest.
 267      *
 268      * @return the parsed DOM document object.
 269      */
 270     public Document parse( String systemId, boolean root ) throws SAXException, IOException {
 271 
 272         systemId = Options.normalizeSystemId(systemId);
 273 
 274         if( core.containsKey(systemId) )
 275             // this document has already been parsed. Just ignore.
 276             return core.get(systemId);
 277 
 278         InputSource is=null;
 279 
 280         // allow entity resolver to find the actual byte stream.
 281         if( entityResolver!=null )
 282             is = entityResolver.resolveEntity(null,systemId);
 283         if( is==null )
 284             is = new InputSource(systemId);
 285 
 286         // but we still use the original system Id as the key.
 287         return parse( systemId, is, root );
 288     }
 289 
 290     /**
 291      * Returns a {@link ContentHandler} to feed SAX events into.
 292      *
 293      * <p>
 294      * The client of this class can feed SAX events into the handler
 295      * to parse a document into this DOM forest.
 296      *
 297      * This version requires that the DOM object to be created and registered
 298      * to the map beforehand.
 299      */
 300     private ContentHandler getParserHandler( Document dom ) {
 301         ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
 302         handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
 303         handler = new VersionChecker(handler,errorReceiver,entityResolver);
 304 
 305         // insert the reference finder so that
 306         // included/imported schemas will be also parsed
 307         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 308         f.setContentHandler(handler);
 309 
 310         if(errorReceiver!=null)
 311             f.setErrorHandler(errorReceiver);
 312         if(entityResolver!=null)
 313             f.setEntityResolver(entityResolver);
 314 
 315         return f;
 316     }
 317 
 318     public interface Handler extends ContentHandler {
 319         /**
 320          * Gets the DOM that was built.
 321          */
 322         public Document getDocument();
 323     }
 324 
 325     private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
 326     }
 327 
 328     /**
 329      * Returns a {@link ContentHandler} to feed SAX events into.
 330      *
 331      * <p>
 332      * The client of this class can feed SAX events into the handler
 333      * to parse a document into this DOM forest.
 334      */
 335     public Handler getParserHandler( String systemId, boolean root ) {
 336         final Document dom = documentBuilder.newDocument();
 337         core.put( systemId, dom );
 338         if(root)
 339             rootDocuments.add(systemId);
 340 
 341         ContentHandler handler = getParserHandler(dom);
 342 
 343         // we will register the DOM to the map once the system ID becomes available.
 344         // but the SAX allows the event source to not to provide that information,
 345         // so be prepared for such case.
 346         HandlerImpl x = new HandlerImpl() {
 347             public Document getDocument() {
 348                 return dom;
 349             }
 350         };
 351         x.setContentHandler(handler);
 352 
 353         return x;
 354    }
 355 
 356     /**
 357      * Parses the given document and add it to the DOM forest.
 358      *
 359      * @return
 360      *      null if there was a parse error. otherwise non-null.
 361      */
 362     public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
 363         Document dom = documentBuilder.newDocument();
 364 
 365         systemId = Options.normalizeSystemId(systemId);
 366 
 367         // put into the map before growing a tree, to
 368         // prevent recursive reference from causing infinite loop.
 369         core.put( systemId, dom );
 370         if(root)
 371             rootDocuments.add(systemId);
 372 
 373         try {
 374             XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 375             reader.setContentHandler(getParserHandler(dom));
 376             if(errorReceiver!=null)
 377                 reader.setErrorHandler(errorReceiver);
 378             if(entityResolver!=null)
 379                 reader.setEntityResolver(entityResolver);
 380             reader.parse(inputSource);
 381         } catch( ParserConfigurationException e ) {
 382             // in practice, this exception won't happen.
 383             errorReceiver.error(e.getMessage(),e);
 384             core.remove(systemId);
 385             rootDocuments.remove(systemId);
 386             return null;
 387         } catch( IOException e ) {
 388             errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
 389             core.remove(systemId);
 390             rootDocuments.remove(systemId);
 391             return null;
 392         }
 393 
 394         return dom;
 395     }
 396 
 397     public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
 398         Document dom = documentBuilder.newDocument();
 399 
 400         systemId = Options.normalizeSystemId(systemId);
 401 
 402         if(root)
 403             rootDocuments.add(systemId);
 404 
 405         if(systemId==null)
 406             throw new IllegalArgumentException("system id cannot be null");
 407         core.put( systemId, dom );
 408 
 409         new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
 410 
 411         return dom;
 412     }
 413 
 414     /**
 415      * Performs internalization.
 416      *
 417      * This method should be called only once, only after all the
 418      * schemas are parsed.
 419      *
 420      * @return
 421      *      the returned bindings need to be applied after schema
 422      *      components are built.
 423      */
 424     public SCDBasedBindingSet transform(boolean enableSCD) {
 425         return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
 426     }
 427 
 428     /**
 429      * Performs the schema correctness check by using JAXP 1.3.
 430      *
 431      * <p>
 432      * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
 433      * doesn't handle inclusions very correctly (it ends up parsing it
 434      * from its original source, not in this tree), and because
 435      * it doesn't handle two documents for the same namespace very
 436      * well.
 437      *
 438      * <p>
 439      * We should eventually fix JAXP (and Xerces), but meanwhile
 440      * this weaker and potentially wrong correctness check is still
 441      * better than nothing when used inside JAX-WS (JAXB CLI and Ant
 442      * does a better job of checking this.)
 443      *
 444      * <p>
 445      * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
 446      */
 447     public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
 448         List<SAXSource> sources = new ArrayList<SAXSource>();
 449         for( String systemId : getRootDocuments() ) {
 450             Document dom = get(systemId);
 451             if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 452                 continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
 453 
 454             SAXSource ss = createSAXSource(systemId);
 455             try {
 456                 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
 457             } catch (SAXException e) {
 458                 throw new AssertionError(e);    // Xerces wants this. See 6395322.
 459             }
 460             sources.add(ss);
 461         }
 462 
 463         try {
 464             allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0]));
 465         } catch (SAXException e) {
 466             // error should have been reported.
 467         } catch (RuntimeException re) {
 468             // JAXP RI isn't very trustworthy when it comes to schema error check,
 469             // and we know some cases where it just dies with NPE. So handle it gracefully.
 470             // this masks a bug in the JAXP RI, but we need a release that we have to make.
 471             try {
 472                 sf.getErrorHandler().warning(
 473                     new SAXParseException(Messages.format(
 474                         Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
 475                         null,null,-1,-1,re));
 476             } catch (SAXException e) {
 477                 // ignore
 478             }
 479         }
 480     }
 481 
 482     /**
 483      * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
 484      * (instead of parsing the original source identified by the system ID.)
 485      */
 486     public @NotNull SAXSource createSAXSource(String systemId) {
 487         ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
 488             // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
 489             // handlers, since SAX allows handlers to be changed while parsing.
 490             @Override
 491             public void parse(InputSource input) throws SAXException, IOException {
 492                 createParser().parse(input, this, this, this);
 493             }
 494 
 495             @Override
 496             public void parse(String systemId) throws SAXException, IOException {
 497                 parse(new InputSource(systemId));
 498             }
 499         });
 500 
 501         return new SAXSource(reader,new InputSource(systemId));
 502     }
 503 
 504     /**
 505      * Creates {@link XMLParser} for XSOM which reads documents from
 506      * this DOMForest rather than doing a fresh parse.
 507      *
 508      * The net effect is that XSOM will read transformed XML Schemas
 509      * instead of the original documents.
 510      */
 511     public XMLParser createParser() {
 512         return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
 513     }
 514 
 515     public EntityResolver getEntityResolver() {
 516         return entityResolver;
 517     }
 518 
 519     public void setEntityResolver(EntityResolver entityResolver) {
 520         this.entityResolver = entityResolver;
 521     }
 522 
 523     public ErrorReceiver getErrorHandler() {
 524         return errorReceiver;
 525     }
 526 
 527     public void setErrorHandler(ErrorReceiver errorHandler) {
 528         this.errorReceiver = errorHandler;
 529     }
 530 
 531     /**
 532      * Gets all the parsed documents.
 533      */
 534     public Document[] listDocuments() {
 535         return core.values().toArray(new Document[core.size()]);
 536     }
 537 
 538     /**
 539      * Gets all the system IDs of the documents.
 540      */
 541     public String[] listSystemIDs() {
 542         return core.keySet().toArray(new String[core.keySet().size()]);
 543     }
 544 
 545     /**
 546      * Dumps the contents of the forest to the specified stream.
 547      *
 548      * This is a debug method. As such, error handling is sloppy.
 549      */
 550     @SuppressWarnings("CallToThreadDumpStack")
 551     public void dump( OutputStream out ) throws IOException {
 552         try {
 553             // create identity transformer
 554             boolean disableXmlSecurity = false;
 555             if (options != null) {
 556                 disableXmlSecurity = options.disableXmlSecurity;
 557             }
 558             TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
 559             Transformer it = tf.newTransformer();
 560 
 561             for (Map.Entry<String, Document> e : core.entrySet()) {
 562                 out.write( ("---<< "+e.getKey()+'\n').getBytes() );
 563 
 564                 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
 565                 dw.setIndentStep("  ");
 566                 it.transform( new DOMSource(e.getValue()),
 567                     new SAXResult(dw));
 568 
 569                 out.write( "\n\n\n".getBytes() );
 570             }
 571         } catch( TransformerException e ) {
 572             e.printStackTrace();
 573         }
 574     }
 575 }