1 /*
   2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.xjc.reader.internalizer;
  27 
  28 import com.sun.istack.internal.NotNull;
  29 import com.sun.istack.internal.XMLStreamReaderToContentHandler;
  30 import com.sun.tools.internal.xjc.ErrorReceiver;
  31 import com.sun.tools.internal.xjc.Options;
  32 import com.sun.tools.internal.xjc.reader.Const;
  33 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
  34 import com.sun.xml.internal.bind.marshaller.DataWriter;
  35 import com.sun.xml.internal.bind.v2.util.XmlFactory;
  36 import com.sun.xml.internal.xsom.parser.JAXPParser;
  37 import com.sun.xml.internal.xsom.parser.XMLParser;
  38 import org.w3c.dom.Document;
  39 import org.w3c.dom.Element;
  40 import org.xml.sax.*;
  41 import org.xml.sax.helpers.XMLFilterImpl;
  42 
  43 import javax.xml.parsers.DocumentBuilder;
  44 import javax.xml.parsers.DocumentBuilderFactory;
  45 import javax.xml.parsers.ParserConfigurationException;
  46 import javax.xml.parsers.SAXParserFactory;
  47 import javax.xml.stream.XMLStreamException;
  48 import javax.xml.stream.XMLStreamReader;
  49 import javax.xml.transform.Source;
  50 import javax.xml.transform.Transformer;
  51 import javax.xml.transform.TransformerException;
  52 import javax.xml.transform.TransformerFactory;
  53 import javax.xml.transform.dom.DOMSource;
  54 import javax.xml.transform.sax.SAXResult;
  55 import javax.xml.transform.sax.SAXSource;
  56 import javax.xml.validation.SchemaFactory;
  57 import java.io.IOException;
  58 import java.io.OutputStream;
  59 import java.io.OutputStreamWriter;
  60 import java.util.*;
  61 
  62 import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess;
  63 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
  64 
  65 
  66 /**
  67  * Builds a DOM forest and maintains association from
  68  * system IDs to DOM trees.
  69  *
  70  * <p>
  71  * A forest is a transitive reflexive closure of referenced documents.
  72  * IOW, if a document is in a forest, all the documents referenced from
  73  * it is in a forest, too. To support this semantics, {@link DOMForest}
  74  * uses {@link InternalizationLogic} to find referenced documents.
  75  *
  76  * <p>
  77  * Some documents are marked as "root"s, meaning those documents were
  78  * put into a forest explicitly, not because it is referenced from another
  79  * document. (However, a root document can be referenced from other
  80  * documents, too.)
  81  *
  82  * @author
  83  *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
  84  */
  85 public final class DOMForest {
  86     /** actual data storage map&lt;SystemId,Document>. */
  87     private final Map<String,Document> core = new HashMap<String,Document>();
  88 
  89     /**
  90      * To correctly feed documents to a schema parser, we need to remember
  91      * which documents (of the forest) were given as the root
  92      * documents, and which of them are read as included/imported
  93      * documents.
  94      *
  95      * <p>
  96      * Set of system ids as strings.
  97      */
  98     private final Set<String> rootDocuments = new LinkedHashSet<String>();
  99 
 100     /** Stores location information for all the trees in this forest. */
 101     public final LocatorTable locatorTable = new LocatorTable();
 102 
 103     /** Stores all the outer-most &lt;jaxb:bindings> customizations. */
 104     public final Set<Element> outerMostBindings = new HashSet<Element>();
 105 
 106     /** Used to resolve references to other schema documents. */
 107     private EntityResolver entityResolver = null;
 108 
 109     /** Errors encountered during the parsing will be sent to this object. */
 110     private ErrorReceiver errorReceiver = null;
 111 
 112     /** Schema language dependent part of the processing. */
 113     protected final InternalizationLogic logic;
 114 
 115     private final SAXParserFactory parserFactory;
 116     private final DocumentBuilder documentBuilder;
 117 
 118     private final Options options;
 119 
 120     public DOMForest(
 121         SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
 122         InternalizationLogic logic ) {
 123 
 124         this.parserFactory = parserFactory;
 125         this.documentBuilder = documentBuilder;
 126         this.logic = logic;
 127         this.options = null;
 128     }
 129 
 130     public DOMForest( InternalizationLogic logic, Options opt ) {
 131 
 132         if (opt == null) throw new AssertionError("Options object null");
 133         this.options = opt;
 134 
 135         try {
 136             DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
 137             this.documentBuilder = dbf.newDocumentBuilder();
 138             this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
 139         } catch( ParserConfigurationException e ) {
 140             throw new AssertionError(e);
 141         }
 142 
 143         this.logic = logic;
 144     }
 145 
 146     /**
 147      * Gets the DOM tree associated with the specified system ID,
 148      * or null if none is found.
 149      */
 150     public Document get( String systemId ) {
 151         Document doc = core.get(systemId);
 152 
 153         if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
 154             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 155             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 156             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 157             // and this descripancy breaks DOM look up by system ID.
 158 
 159             // this extra check solves this problem.
 160             doc = core.get( "file://"+systemId.substring(5) );
 161         }
 162 
 163         if( doc==null && systemId.startsWith("file:") ) {
 164             // on Windows, filenames are case insensitive.
 165             // perform case-insensitive search for improved user experience
 166             String systemPath = getPath(systemId);
 167             for (String key : core.keySet()) {
 168                 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 169                     doc = core.get(key);
 170                     break;
 171                 }
 172             }
 173         }
 174 
 175         return doc;
 176     }
 177 
 178     /**
 179      * Strips off the leading 'file:///' portion from an URL.
 180      */
 181     private String getPath(String key) {
 182         key = key.substring(5); // skip 'file:'
 183         while(key.length()>0 && key.charAt(0)=='/') {
 184             key = key.substring(1);
 185         }
 186         return key;
 187     }
 188 
 189     /**
 190      * Returns a read-only set of root document system IDs.
 191      */
 192     public Set<String> getRootDocuments() {
 193         return Collections.unmodifiableSet(rootDocuments);
 194     }
 195 
 196     /**
 197      * Picks one document at random and returns it.
 198      */
 199     public Document getOneDocument() {
 200         for (Document dom : core.values()) {
 201             if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 202                 return dom;
 203         }
 204         // we should have caught this error very early on
 205         throw new AssertionError();
 206     }
 207 
 208     /**
 209      * Checks the correctness of the XML Schema documents and return true
 210      * if it's OK.
 211      *
 212      * <p>
 213      * This method performs a weaker version of the tests where error messages
 214      * are provided without line number information. So whenever possible
 215      * use {@link SchemaConstraintChecker}.
 216      *
 217      * @see SchemaConstraintChecker
 218      */
 219     public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
 220         try {
 221             boolean disableXmlSecurity = false;
 222             if (options != null) {
 223                 disableXmlSecurity = options.disableXmlSecurity;
 224             }
 225             SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
 226             ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
 227             sf.setErrorHandler(filter);
 228             Set<String> roots = getRootDocuments();
 229             Source[] sources = new Source[roots.size()];
 230             int i=0;
 231             for (String root : roots) {
 232                 sources[i++] = new DOMSource(get(root),root);
 233             }
 234             sf.newSchema(sources);
 235             return !filter.hadError();
 236         } catch (SAXException e) {
 237             // the errors should have been reported
 238             return false;
 239         }
 240     }
 241 
 242     /**
 243      * Gets the system ID from which the given DOM is parsed.
 244      * <p>
 245      * Poor-man's base URI.
 246      */
 247     public String getSystemId( Document dom ) {
 248         for (Map.Entry<String,Document> e : core.entrySet()) {
 249             if (e.getValue() == dom)
 250                 return e.getKey();
 251         }
 252         return null;
 253     }
 254 
 255     public Document parse( InputSource source, boolean root ) throws SAXException {
 256         if( source.getSystemId()==null )
 257             throw new IllegalArgumentException();
 258 
 259         return parse( source.getSystemId(), source, root );
 260     }
 261 
 262     /**
 263      * Parses an XML at the given location (
 264      * and XMLs referenced by it) into DOM trees
 265      * and stores them to this forest.
 266      *
 267      * @return the parsed DOM document object.
 268      */
 269     public Document parse( String systemId, boolean root ) throws SAXException, IOException {
 270 
 271         systemId = Options.normalizeSystemId(systemId);
 272 
 273         if( core.containsKey(systemId) )
 274             // this document has already been parsed. Just ignore.
 275             return core.get(systemId);
 276 
 277         InputSource is=null;
 278 
 279         // allow entity resolver to find the actual byte stream.
 280         if( entityResolver!=null )
 281             is = entityResolver.resolveEntity(null,systemId);
 282         if( is==null )
 283             is = new InputSource(systemId);
 284 
 285         // but we still use the original system Id as the key.
 286         return parse( systemId, is, root );
 287     }
 288 
 289     /**
 290      * Returns a {@link ContentHandler} to feed SAX events into.
 291      *
 292      * <p>
 293      * The client of this class can feed SAX events into the handler
 294      * to parse a document into this DOM forest.
 295      *
 296      * This version requires that the DOM object to be created and registered
 297      * to the map beforehand.
 298      */
 299     private ContentHandler getParserHandler( Document dom ) {
 300         ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
 301         handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
 302         handler = new VersionChecker(handler,errorReceiver,entityResolver);
 303 
 304         // insert the reference finder so that
 305         // included/imported schemas will be also parsed
 306         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 307         f.setContentHandler(handler);
 308 
 309         if(errorReceiver!=null)
 310             f.setErrorHandler(errorReceiver);
 311         if(entityResolver!=null)
 312             f.setEntityResolver(entityResolver);
 313 
 314         return f;
 315     }
 316 
 317     public interface Handler extends ContentHandler {
 318         /**
 319          * Gets the DOM that was built.
 320          */
 321         public Document getDocument();
 322     }
 323 
 324     private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
 325     }
 326 
 327     /**
 328      * Returns a {@link ContentHandler} to feed SAX events into.
 329      *
 330      * <p>
 331      * The client of this class can feed SAX events into the handler
 332      * to parse a document into this DOM forest.
 333      */
 334     public Handler getParserHandler( String systemId, boolean root ) {
 335         final Document dom = documentBuilder.newDocument();
 336         core.put( systemId, dom );
 337         if(root)
 338             rootDocuments.add(systemId);
 339 
 340         ContentHandler handler = getParserHandler(dom);
 341 
 342         // we will register the DOM to the map once the system ID becomes available.
 343         // but the SAX allows the event source to not to provide that information,
 344         // so be prepared for such case.
 345         HandlerImpl x = new HandlerImpl() {
 346             public Document getDocument() {
 347                 return dom;
 348             }
 349         };
 350         x.setContentHandler(handler);
 351 
 352         return x;
 353    }
 354 
 355     /**
 356      * Parses the given document and add it to the DOM forest.
 357      *
 358      * @return
 359      *      null if there was a parse error. otherwise non-null.
 360      */
 361     public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
 362         Document dom = documentBuilder.newDocument();
 363 
 364         systemId = Options.normalizeSystemId(systemId);
 365 
 366         // put into the map before growing a tree, to
 367         // prevent recursive reference from causing infinite loop.
 368         core.put( systemId, dom );
 369         if(root)
 370             rootDocuments.add(systemId);
 371 
 372         try {
 373             XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 374             reader.setContentHandler(getParserHandler(dom));
 375             if(errorReceiver!=null)
 376                 reader.setErrorHandler(errorReceiver);
 377             if(entityResolver!=null)
 378                 reader.setEntityResolver(entityResolver);
 379             reader.parse(inputSource);
 380         } catch( ParserConfigurationException e ) {
 381             // in practice, this exception won't happen.
 382             errorReceiver.error(e.getMessage(),e);
 383             core.remove(systemId);
 384             rootDocuments.remove(systemId);
 385             return null;
 386         } catch( IOException e ) {
 387             errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
 388             core.remove(systemId);
 389             rootDocuments.remove(systemId);
 390             return null;
 391         }
 392 
 393         return dom;
 394     }
 395 
 396     public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
 397         Document dom = documentBuilder.newDocument();
 398 
 399         systemId = Options.normalizeSystemId(systemId);
 400 
 401         if(root)
 402             rootDocuments.add(systemId);
 403 
 404         if(systemId==null)
 405             throw new IllegalArgumentException("system id cannot be null");
 406         core.put( systemId, dom );
 407 
 408         new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
 409 
 410         return dom;
 411     }
 412 
 413     /**
 414      * Performs internalization.
 415      *
 416      * This method should be called only once, only after all the
 417      * schemas are parsed.
 418      *
 419      * @return
 420      *      the returned bindings need to be applied after schema
 421      *      components are built.
 422      */
 423     public SCDBasedBindingSet transform(boolean enableSCD) {
 424         return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
 425     }
 426 
 427     /**
 428      * Performs the schema correctness check by using JAXP 1.3.
 429      *
 430      * <p>
 431      * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
 432      * doesn't handle inclusions very correctly (it ends up parsing it
 433      * from its original source, not in this tree), and because
 434      * it doesn't handle two documents for the same namespace very
 435      * well.
 436      *
 437      * <p>
 438      * We should eventually fix JAXP (and Xerces), but meanwhile
 439      * this weaker and potentially wrong correctness check is still
 440      * better than nothing when used inside JAX-WS (JAXB CLI and Ant
 441      * does a better job of checking this.)
 442      *
 443      * <p>
 444      * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
 445      */
 446     public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
 447         List<SAXSource> sources = new ArrayList<SAXSource>();
 448         for( String systemId : getRootDocuments() ) {
 449             Document dom = get(systemId);
 450             if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 451                 continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
 452 
 453             SAXSource ss = createSAXSource(systemId);
 454             try {
 455                 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
 456             } catch (SAXException e) {
 457                 throw new AssertionError(e);    // Xerces wants this. See 6395322.
 458             }
 459             sources.add(ss);
 460         }
 461 
 462         try {
 463             allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0]));
 464         } catch (SAXException e) {
 465             // error should have been reported.
 466         } catch (RuntimeException re) {
 467             // JAXP RI isn't very trustworthy when it comes to schema error check,
 468             // and we know some cases where it just dies with NPE. So handle it gracefully.
 469             // this masks a bug in the JAXP RI, but we need a release that we have to make.
 470             try {
 471                 sf.getErrorHandler().warning(
 472                     new SAXParseException(Messages.format(
 473                         Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
 474                         null,null,-1,-1,re));
 475             } catch (SAXException e) {
 476                 // ignore
 477             }
 478         }
 479     }
 480 
 481     /**
 482      * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
 483      * (instead of parsing the original source identified by the system ID.)
 484      */
 485     public @NotNull SAXSource createSAXSource(String systemId) {
 486         ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
 487             // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
 488             // handlers, since SAX allows handlers to be changed while parsing.
 489             @Override
 490             public void parse(InputSource input) throws SAXException, IOException {
 491                 createParser().parse(input, this, this, this);
 492             }
 493 
 494             @Override
 495             public void parse(String systemId) throws SAXException, IOException {
 496                 parse(new InputSource(systemId));
 497             }
 498         });
 499 
 500         return new SAXSource(reader,new InputSource(systemId));
 501     }
 502 
 503     /**
 504      * Creates {@link XMLParser} for XSOM which reads documents from
 505      * this DOMForest rather than doing a fresh parse.
 506      *
 507      * The net effect is that XSOM will read transformed XML Schemas
 508      * instead of the original documents.
 509      */
 510     public XMLParser createParser() {
 511         return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
 512     }
 513 
 514     public EntityResolver getEntityResolver() {
 515         return entityResolver;
 516     }
 517 
 518     public void setEntityResolver(EntityResolver entityResolver) {
 519         this.entityResolver = entityResolver;
 520     }
 521 
 522     public ErrorReceiver getErrorHandler() {
 523         return errorReceiver;
 524     }
 525 
 526     public void setErrorHandler(ErrorReceiver errorHandler) {
 527         this.errorReceiver = errorHandler;
 528     }
 529 
 530     /**
 531      * Gets all the parsed documents.
 532      */
 533     public Document[] listDocuments() {
 534         return core.values().toArray(new Document[core.size()]);
 535     }
 536 
 537     /**
 538      * Gets all the system IDs of the documents.
 539      */
 540     public String[] listSystemIDs() {
 541         return core.keySet().toArray(new String[core.keySet().size()]);
 542     }
 543 
 544     /**
 545      * Dumps the contents of the forest to the specified stream.
 546      *
 547      * This is a debug method. As such, error handling is sloppy.
 548      */
 549     @SuppressWarnings("CallToThreadDumpStack")
 550     public void dump( OutputStream out ) throws IOException {
 551         try {
 552             // create identity transformer
 553             boolean disableXmlSecurity = false;
 554             if (options != null) {
 555                 disableXmlSecurity = options.disableXmlSecurity;
 556             }
 557             TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
 558             Transformer it = tf.newTransformer();
 559 
 560             for (Map.Entry<String, Document> e : core.entrySet()) {
 561                 out.write( ("---<< "+e.getKey()+'\n').getBytes() );
 562 
 563                 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
 564                 dw.setIndentStep("  ");
 565                 it.transform( new DOMSource(e.getValue()),
 566                     new SAXResult(dw));
 567 
 568                 out.write( "\n\n\n".getBytes() );
 569             }
 570         } catch( TransformerException e ) {
 571             e.printStackTrace();
 572         }
 573     }
 574 }