1 /*
   2  * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.xjc.reader.internalizer;
  27 
  28 import java.io.IOException;
  29 import java.io.OutputStream;
  30 import java.io.OutputStreamWriter;
  31 import java.util.ArrayList;
  32 import java.util.Collections;
  33 import java.util.HashMap;
  34 import java.util.HashSet;
  35 import java.util.List;
  36 import java.util.Map;
  37 import java.util.Set;
  38 
  39 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
  40 import javax.xml.parsers.DocumentBuilder;
  41 import javax.xml.parsers.DocumentBuilderFactory;
  42 import javax.xml.parsers.ParserConfigurationException;
  43 import javax.xml.parsers.SAXParserFactory;
  44 import javax.xml.stream.XMLStreamException;
  45 import javax.xml.stream.XMLStreamReader;
  46 import javax.xml.transform.Source;
  47 import javax.xml.transform.Transformer;
  48 import javax.xml.transform.TransformerException;
  49 import javax.xml.transform.TransformerFactory;
  50 import javax.xml.transform.dom.DOMSource;
  51 import javax.xml.transform.sax.SAXResult;
  52 import javax.xml.transform.sax.SAXSource;
  53 import javax.xml.validation.SchemaFactory;
  54 
  55 import com.sun.istack.internal.NotNull;
  56 import com.sun.istack.internal.XMLStreamReaderToContentHandler;
  57 import com.sun.tools.internal.xjc.ErrorReceiver;
  58 import com.sun.tools.internal.xjc.Options;
  59 import com.sun.tools.internal.xjc.reader.Const;
  60 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
  61 import com.sun.xml.internal.bind.marshaller.DataWriter;
  62 import com.sun.xml.internal.bind.v2.util.XmlFactory;
  63 import com.sun.xml.internal.xsom.parser.JAXPParser;
  64 import com.sun.xml.internal.xsom.parser.XMLParser;
  65 
  66 import org.w3c.dom.Document;
  67 import org.w3c.dom.Element;
  68 import org.xml.sax.ContentHandler;
  69 import org.xml.sax.EntityResolver;
  70 import org.xml.sax.InputSource;
  71 import org.xml.sax.SAXException;
  72 import org.xml.sax.SAXParseException;
  73 import org.xml.sax.XMLReader;
  74 import org.xml.sax.helpers.XMLFilterImpl;
  75 
  76 
  77 /**
  78  * Builds a DOM forest and maintains association from
  79  * system IDs to DOM trees.
  80  *
  81  * <p>
  82  * A forest is a transitive reflexive closure of referenced documents.
  83  * IOW, if a document is in a forest, all the documents referenced from
  84  * it is in a forest, too. To support this semantics, {@link DOMForest}
  85  * uses {@link InternalizationLogic} to find referenced documents.
  86  *
  87  * <p>
  88  * Some documents are marked as "root"s, meaning those documents were
  89  * put into a forest explicitly, not because it is referenced from another
  90  * document. (However, a root document can be referenced from other
  91  * documents, too.)
  92  *
  93  * @author
  94  *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
  95  */
  96 public final class DOMForest {
  97     /** actual data storage map&lt;SystemId,Document>. */
  98     private final Map<String,Document> core = new HashMap<String,Document>();
  99 
 100     /**
 101      * To correctly feed documents to a schema parser, we need to remember
 102      * which documents (of the forest) were given as the root
 103      * documents, and which of them are read as included/imported
 104      * documents.
 105      *
 106      * <p>
 107      * Set of system ids as strings.
 108      */
 109     private final Set<String> rootDocuments = new HashSet<String>();
 110 
 111     /** Stores location information for all the trees in this forest. */
 112     public final LocatorTable locatorTable = new LocatorTable();
 113 
 114     /** Stores all the outer-most &lt;jaxb:bindings> customizations. */
 115     public final Set<Element> outerMostBindings = new HashSet<Element>();
 116 
 117     /** Used to resolve references to other schema documents. */
 118     private EntityResolver entityResolver = null;
 119 
 120     /** Errors encountered during the parsing will be sent to this object. */
 121     private ErrorReceiver errorReceiver = null;
 122 
 123     /** Schema language dependent part of the processing. */
 124     protected final InternalizationLogic logic;
 125 
 126     private final SAXParserFactory parserFactory;
 127     private final DocumentBuilder documentBuilder;
 128 
 129     private final Options options;
 130 
 131     public DOMForest(
 132         SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
 133         InternalizationLogic logic ) {
 134 
 135         this.parserFactory = parserFactory;
 136         this.documentBuilder = documentBuilder;
 137         this.logic = logic;
 138         this.options = null;
 139     }
 140 
 141     public DOMForest( InternalizationLogic logic, Options opt ) {
 142 
 143         if (opt == null) throw new AssertionError("Options object null");
 144         this.options = opt;
 145 
 146         try {
 147             DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
 148             this.documentBuilder = dbf.newDocumentBuilder();
 149             this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
 150         } catch( ParserConfigurationException e ) {
 151             throw new AssertionError(e);
 152         }
 153 
 154         this.logic = logic;
 155     }
 156 
 157     /**
 158      * Gets the DOM tree associated with the specified system ID,
 159      * or null if none is found.
 160      */
 161     public Document get( String systemId ) {
 162         Document doc = core.get(systemId);
 163 
 164         if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
 165             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 166             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 167             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 168             // and this descripancy breaks DOM look up by system ID.
 169 
 170             // this extra check solves this problem.
 171             doc = core.get( "file://"+systemId.substring(5) );
 172         }
 173 
 174         if( doc==null && systemId.startsWith("file:") ) {
 175             // on Windows, filenames are case insensitive.
 176             // perform case-insensitive search for improved user experience
 177             String systemPath = getPath(systemId);
 178             for (String key : core.keySet()) {
 179                 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 180                     doc = core.get(key);
 181                     break;
 182                 }
 183             }
 184         }
 185 
 186         return doc;
 187     }
 188 
 189     /**
 190      * Strips off the leading 'file:///' portion from an URL.
 191      */
 192     private String getPath(String key) {
 193         key = key.substring(5); // skip 'file:'
 194         while(key.length()>0 && key.charAt(0)=='/') {
 195             key = key.substring(1);
 196         }
 197         return key;
 198     }
 199 
 200     /**
 201      * Returns a read-only set of root document system IDs.
 202      */
 203     public Set<String> getRootDocuments() {
 204         return Collections.unmodifiableSet(rootDocuments);
 205     }
 206 
 207     /**
 208      * Picks one document at random and returns it.
 209      */
 210     public Document getOneDocument() {
 211         for (Document dom : core.values()) {
 212             if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 213                 return dom;
 214         }
 215         // we should have caught this error very early on
 216         throw new AssertionError();
 217     }
 218 
 219     /**
 220      * Checks the correctness of the XML Schema documents and return true
 221      * if it's OK.
 222      *
 223      * <p>
 224      * This method performs a weaker version of the tests where error messages
 225      * are provided without line number information. So whenever possible
 226      * use {@link SchemaConstraintChecker}.
 227      *
 228      * @see SchemaConstraintChecker
 229      */
 230     public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
 231         try {
 232             boolean disableXmlSecurity = false;
 233             if (options != null) {
 234                 disableXmlSecurity = options.disableXmlSecurity;
 235             }
 236             SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
 237             ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
 238             sf.setErrorHandler(filter);
 239             Set<String> roots = getRootDocuments();
 240             Source[] sources = new Source[roots.size()];
 241             int i=0;
 242             for (String root : roots) {
 243                 sources[i++] = new DOMSource(get(root),root);
 244             }
 245             sf.newSchema(sources);
 246             return !filter.hadError();
 247         } catch (SAXException e) {
 248             // the errors should have been reported
 249             return false;
 250         }
 251     }
 252 
 253     /**
 254      * Gets the system ID from which the given DOM is parsed.
 255      * <p>
 256      * Poor-man's base URI.
 257      */
 258     public String getSystemId( Document dom ) {
 259         for (Map.Entry<String,Document> e : core.entrySet()) {
 260             if (e.getValue() == dom)
 261                 return e.getKey();
 262         }
 263         return null;
 264     }
 265 
 266     public Document parse( InputSource source, boolean root ) throws SAXException {
 267         if( source.getSystemId()==null )
 268             throw new IllegalArgumentException();
 269 
 270         return parse( source.getSystemId(), source, root );
 271     }
 272 
 273     /**
 274      * Parses an XML at the given location (
 275      * and XMLs referenced by it) into DOM trees
 276      * and stores them to this forest.
 277      *
 278      * @return the parsed DOM document object.
 279      */
 280     public Document parse( String systemId, boolean root ) throws SAXException, IOException {
 281 
 282         systemId = Options.normalizeSystemId(systemId);
 283 
 284         if( core.containsKey(systemId) )
 285             // this document has already been parsed. Just ignore.
 286             return core.get(systemId);
 287 
 288         InputSource is=null;
 289 
 290         // allow entity resolver to find the actual byte stream.
 291         if( entityResolver!=null )
 292             is = entityResolver.resolveEntity(null,systemId);
 293         if( is==null )
 294             is = new InputSource(systemId);
 295 
 296         // but we still use the original system Id as the key.
 297         return parse( systemId, is, root );
 298     }
 299 
 300     /**
 301      * Returns a {@link ContentHandler} to feed SAX events into.
 302      *
 303      * <p>
 304      * The client of this class can feed SAX events into the handler
 305      * to parse a document into this DOM forest.
 306      *
 307      * This version requires that the DOM object to be created and registered
 308      * to the map beforehand.
 309      */
 310     private ContentHandler getParserHandler( Document dom ) {
 311         ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
 312         handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
 313         handler = new VersionChecker(handler,errorReceiver,entityResolver);
 314 
 315         // insert the reference finder so that
 316         // included/imported schemas will be also parsed
 317         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 318         f.setContentHandler(handler);
 319 
 320         if(errorReceiver!=null)
 321             f.setErrorHandler(errorReceiver);
 322         if(entityResolver!=null)
 323             f.setEntityResolver(entityResolver);
 324 
 325         return f;
 326     }
 327 
 328     public interface Handler extends ContentHandler {
 329         /**
 330          * Gets the DOM that was built.
 331          */
 332         public Document getDocument();
 333     }
 334 
 335     private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
 336     }
 337 
 338     /**
 339      * Returns a {@link ContentHandler} to feed SAX events into.
 340      *
 341      * <p>
 342      * The client of this class can feed SAX events into the handler
 343      * to parse a document into this DOM forest.
 344      */
 345     public Handler getParserHandler( String systemId, boolean root ) {
 346         final Document dom = documentBuilder.newDocument();
 347         core.put( systemId, dom );
 348         if(root)
 349             rootDocuments.add(systemId);
 350 
 351         ContentHandler handler = getParserHandler(dom);
 352 
 353         // we will register the DOM to the map once the system ID becomes available.
 354         // but the SAX allows the event source to not to provide that information,
 355         // so be prepared for such case.
 356         HandlerImpl x = new HandlerImpl() {
 357             public Document getDocument() {
 358                 return dom;
 359             }
 360         };
 361         x.setContentHandler(handler);
 362 
 363         return x;
 364    }
 365 
 366     /**
 367      * Parses the given document and add it to the DOM forest.
 368      *
 369      * @return
 370      *      null if there was a parse error. otherwise non-null.
 371      */
 372     public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
 373         Document dom = documentBuilder.newDocument();
 374 
 375         systemId = Options.normalizeSystemId(systemId);
 376 
 377         // put into the map before growing a tree, to
 378         // prevent recursive reference from causing infinite loop.
 379         core.put( systemId, dom );
 380         if(root)
 381             rootDocuments.add(systemId);
 382 
 383         try {
 384             XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 385             reader.setContentHandler(getParserHandler(dom));
 386             if(errorReceiver!=null)
 387                 reader.setErrorHandler(errorReceiver);
 388             if(entityResolver!=null)
 389                 reader.setEntityResolver(entityResolver);
 390             reader.parse(inputSource);
 391         } catch( ParserConfigurationException e ) {
 392             // in practice, this exception won't happen.
 393             errorReceiver.error(e.getMessage(),e);
 394             core.remove(systemId);
 395             rootDocuments.remove(systemId);
 396             return null;
 397         } catch( IOException e ) {
 398             errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
 399             core.remove(systemId);
 400             rootDocuments.remove(systemId);
 401             return null;
 402         }
 403 
 404         return dom;
 405     }
 406 
 407     public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
 408         Document dom = documentBuilder.newDocument();
 409 
 410         systemId = Options.normalizeSystemId(systemId);
 411 
 412         if(root)
 413             rootDocuments.add(systemId);
 414 
 415         if(systemId==null)
 416             throw new IllegalArgumentException("system id cannot be null");
 417         core.put( systemId, dom );
 418 
 419         new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
 420 
 421         return dom;
 422     }
 423 
 424     /**
 425      * Performs internalization.
 426      *
 427      * This method should be called only once, only after all the
 428      * schemas are parsed.
 429      *
 430      * @return
 431      *      the returned bindings need to be applied after schema
 432      *      components are built.
 433      */
 434     public SCDBasedBindingSet transform(boolean enableSCD) {
 435         return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
 436     }
 437 
 438     /**
 439      * Performs the schema correctness check by using JAXP 1.3.
 440      *
 441      * <p>
 442      * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
 443      * doesn't handle inclusions very correctly (it ends up parsing it
 444      * from its original source, not in this tree), and because
 445      * it doesn't handle two documents for the same namespace very
 446      * well.
 447      *
 448      * <p>
 449      * We should eventually fix JAXP (and Xerces), but meanwhile
 450      * this weaker and potentially wrong correctness check is still
 451      * better than nothing when used inside JAX-WS (JAXB CLI and Ant
 452      * does a better job of checking this.)
 453      *
 454      * <p>
 455      * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
 456      */
 457     public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
 458         List<SAXSource> sources = new ArrayList<SAXSource>();
 459         for( String systemId : getRootDocuments() ) {
 460             Document dom = get(systemId);
 461             if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 462                 continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
 463 
 464             SAXSource ss = createSAXSource(systemId);
 465             try {
 466                 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
 467             } catch (SAXException e) {
 468                 throw new AssertionError(e);    // Xerces wants this. See 6395322.
 469             }
 470             sources.add(ss);
 471         }
 472 
 473         try {
 474             sf.newSchema(sources.toArray(new SAXSource[0]));
 475         } catch (SAXException e) {
 476             // error should have been reported.
 477         } catch (RuntimeException re) {
 478             // JAXP RI isn't very trustworthy when it comes to schema error check,
 479             // and we know some cases where it just dies with NPE. So handle it gracefully.
 480             // this masks a bug in the JAXP RI, but we need a release that we have to make.
 481             try {
 482                 sf.getErrorHandler().warning(
 483                     new SAXParseException(Messages.format(
 484                         Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
 485                         null,null,-1,-1,re));
 486             } catch (SAXException e) {
 487                 // ignore
 488             }
 489         }
 490     }
 491 
 492     /**
 493      * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
 494      * (instead of parsing the original source identified by the system ID.)
 495      */
 496     public @NotNull SAXSource createSAXSource(String systemId) {
 497         ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
 498             // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
 499             // handlers, since SAX allows handlers to be changed while parsing.
 500             @Override
 501             public void parse(InputSource input) throws SAXException, IOException {
 502                 createParser().parse(input, this, this, this);
 503             }
 504 
 505             @Override
 506             public void parse(String systemId) throws SAXException, IOException {
 507                 parse(new InputSource(systemId));
 508             }
 509         });
 510 
 511         return new SAXSource(reader,new InputSource(systemId));
 512     }
 513 
 514     /**
 515      * Creates {@link XMLParser} for XSOM which reads documents from
 516      * this DOMForest rather than doing a fresh parse.
 517      *
 518      * The net effect is that XSOM will read transformed XML Schemas
 519      * instead of the original documents.
 520      */
 521     public XMLParser createParser() {
 522         return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
 523     }
 524 
 525     public EntityResolver getEntityResolver() {
 526         return entityResolver;
 527     }
 528 
 529     public void setEntityResolver(EntityResolver entityResolver) {
 530         this.entityResolver = entityResolver;
 531     }
 532 
 533     public ErrorReceiver getErrorHandler() {
 534         return errorReceiver;
 535     }
 536 
 537     public void setErrorHandler(ErrorReceiver errorHandler) {
 538         this.errorReceiver = errorHandler;
 539     }
 540 
 541     /**
 542      * Gets all the parsed documents.
 543      */
 544     public Document[] listDocuments() {
 545         return core.values().toArray(new Document[core.size()]);
 546     }
 547 
 548     /**
 549      * Gets all the system IDs of the documents.
 550      */
 551     public String[] listSystemIDs() {
 552         return core.keySet().toArray(new String[core.keySet().size()]);
 553     }
 554 
 555     /**
 556      * Dumps the contents of the forest to the specified stream.
 557      *
 558      * This is a debug method. As such, error handling is sloppy.
 559      */
 560     @SuppressWarnings("CallToThreadDumpStack")
 561     public void dump( OutputStream out ) throws IOException {
 562         try {
 563             // create identity transformer
 564             boolean disableXmlSecurity = false;
 565             if (options != null) {
 566                 disableXmlSecurity = options.disableXmlSecurity;
 567             }
 568             TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
 569             Transformer it = tf.newTransformer();
 570 
 571             for (Map.Entry<String, Document> e : core.entrySet()) {
 572                 out.write( ("---<< "+e.getKey()+'\n').getBytes() );
 573 
 574                 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
 575                 dw.setIndentStep("  ");
 576                 it.transform( new DOMSource(e.getValue()),
 577                     new SAXResult(dw));
 578 
 579                 out.write( "\n\n\n".getBytes() );
 580             }
 581         } catch( TransformerException e ) {
 582             e.printStackTrace();
 583         }
 584     }
 585 }