1 /*
   2  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.internal.xjc.reader.internalizer;
  27 
  28 import java.io.IOException;
  29 import java.io.OutputStream;
  30 import java.io.OutputStreamWriter;
  31 import java.util.ArrayList;
  32 import java.util.Collections;
  33 import java.util.HashMap;
  34 import java.util.HashSet;
  35 import java.util.List;
  36 import java.util.Map;
  37 import java.util.Set;
  38 
  39 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
  40 import javax.xml.parsers.DocumentBuilder;
  41 import javax.xml.parsers.DocumentBuilderFactory;
  42 import javax.xml.parsers.ParserConfigurationException;
  43 import javax.xml.parsers.SAXParserFactory;
  44 import javax.xml.stream.XMLStreamException;
  45 import javax.xml.stream.XMLStreamReader;
  46 import javax.xml.transform.Source;
  47 import javax.xml.transform.Transformer;
  48 import javax.xml.transform.TransformerException;
  49 import javax.xml.transform.TransformerFactory;
  50 import javax.xml.transform.dom.DOMSource;
  51 import javax.xml.transform.sax.SAXResult;
  52 import javax.xml.transform.sax.SAXSource;
  53 import javax.xml.validation.SchemaFactory;
  54 
  55 import com.sun.istack.internal.NotNull;
  56 import com.sun.istack.internal.XMLStreamReaderToContentHandler;
  57 import com.sun.tools.internal.xjc.ErrorReceiver;
  58 import com.sun.tools.internal.xjc.Options;
  59 import com.sun.tools.internal.xjc.reader.Const;
  60 import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
  61 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
  62 import com.sun.xml.internal.bind.marshaller.DataWriter;
  63 import com.sun.xml.internal.xsom.parser.JAXPParser;
  64 import com.sun.xml.internal.xsom.parser.XMLParser;
  65 
  66 import org.w3c.dom.Document;
  67 import org.w3c.dom.Element;
  68 import org.xml.sax.ContentHandler;
  69 import org.xml.sax.EntityResolver;
  70 import org.xml.sax.InputSource;
  71 import org.xml.sax.SAXException;
  72 import org.xml.sax.SAXParseException;
  73 import org.xml.sax.XMLReader;
  74 import org.xml.sax.helpers.XMLFilterImpl;
  75 
  76 
  77 /**
  78  * Builds a DOM forest and maintains association from
  79  * system IDs to DOM trees.
  80  *
  81  * <p>
  82  * A forest is a transitive reflexive closure of referenced documents.
  83  * IOW, if a document is in a forest, all the documents referenced from
  84  * it is in a forest, too. To support this semantics, {@link DOMForest}
  85  * uses {@link InternalizationLogic} to find referenced documents.
  86  *
  87  * <p>
  88  * Some documents are marked as "root"s, meaning those documents were
  89  * put into a forest explicitly, not because it is referenced from another
  90  * document. (However, a root document can be referenced from other
  91  * documents, too.)
  92  *
  93  * @author
  94  *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
  95  */
  96 public final class DOMForest {
  97     /** actual data storage map&lt;SystemId,Document>. */
  98     private final Map<String,Document> core = new HashMap<String,Document>();
  99 
 100     /**
 101      * To correctly feed documents to a schema parser, we need to remember
 102      * which documents (of the forest) were given as the root
 103      * documents, and which of them are read as included/imported
 104      * documents.
 105      *
 106      * <p>
 107      * Set of system ids as strings.
 108      */
 109     private final Set<String> rootDocuments = new HashSet<String>();
 110 
 111     /** Stores location information for all the trees in this forest. */
 112     public final LocatorTable locatorTable = new LocatorTable();
 113 
 114     /** Stores all the outer-most &lt;jaxb:bindings> customizations. */
 115     public final Set<Element> outerMostBindings = new HashSet<Element>();
 116 
 117     /** Used to resolve references to other schema documents. */
 118     private EntityResolver entityResolver = null;
 119 
 120     /** Errors encountered during the parsing will be sent to this object. */
 121     private ErrorReceiver errorReceiver = null;
 122 
 123     /** Schema language dependent part of the processing. */
 124     protected final InternalizationLogic logic;
 125 
 126     private final SAXParserFactory parserFactory;
 127     private final DocumentBuilder documentBuilder;
 128 
 129 
 130     public DOMForest(
 131         SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
 132         InternalizationLogic logic ) {
 133 
 134         this.parserFactory = parserFactory;
 135         this.documentBuilder = documentBuilder;
 136         this.logic = logic;
 137     }
 138 
 139     public DOMForest( InternalizationLogic logic ) {
 140         try {
 141             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
 142             dbf.setNamespaceAware(true);
 143             this.documentBuilder = dbf.newDocumentBuilder();
 144 
 145             this.parserFactory = SAXParserFactory.newInstance();
 146             this.parserFactory.setNamespaceAware(true);
 147         } catch( ParserConfigurationException e ) {
 148             throw new AssertionError(e);
 149         }
 150 
 151         this.logic = logic;
 152     }
 153 
 154     /**
 155      * Gets the DOM tree associated with the specified system ID,
 156      * or null if none is found.
 157      */
 158     public Document get( String systemId ) {
 159         Document doc = core.get(systemId);
 160 
 161         if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
 162             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
 163             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
 164             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
 165             // and this descripancy breaks DOM look up by system ID.
 166 
 167             // this extra check solves this problem.
 168             doc = core.get( "file://"+systemId.substring(5) );
 169         }
 170 
 171         if( doc==null && systemId.startsWith("file:") ) {
 172             // on Windows, filenames are case insensitive.
 173             // perform case-insensitive search for improved user experience
 174             String systemPath = getPath(systemId);
 175             for (String key : core.keySet()) {
 176                 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
 177                     doc = core.get(key);
 178                     break;
 179                 }
 180             }
 181         }
 182 
 183         return doc;
 184     }
 185 
 186     /**
 187      * Strips off the leading 'file:///' portion from an URL.
 188      */
 189     private String getPath(String key) {
 190         key = key.substring(5); // skip 'file:'
 191         while(key.length()>0 && key.charAt(0)=='/')
 192             key = key.substring(1);
 193         return key;
 194     }
 195 
 196     /**
 197      * Returns a read-only set of root document system IDs.
 198      */
 199     public Set<String> getRootDocuments() {
 200         return Collections.unmodifiableSet(rootDocuments);
 201     }
 202 
 203     /**
 204      * Picks one document at random and returns it.
 205      */
 206     public Document getOneDocument() {
 207         for (Document dom : core.values()) {
 208             if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 209                 return dom;
 210         }
 211         // we should have caught this error very early on
 212         throw new AssertionError();
 213     }
 214 
 215     /**
 216      * Checks the correctness of the XML Schema documents and return true
 217      * if it's OK.
 218      *
 219      * <p>
 220      * This method performs a weaker version of the tests where error messages
 221      * are provided without line number information. So whenever possible
 222      * use {@link SchemaConstraintChecker}.
 223      *
 224      * @see SchemaConstraintChecker
 225      */
 226     public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
 227         try {
 228             SchemaFactory sf = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI);
 229             ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
 230             sf.setErrorHandler(filter);
 231             Set<String> roots = getRootDocuments();
 232             Source[] sources = new Source[roots.size()];
 233             int i=0;
 234             for (String root : roots) {
 235                 sources[i++] = new DOMSource(get(root),root);
 236             }
 237             sf.newSchema(sources);
 238             return !filter.hadError();
 239         } catch (SAXException e) {
 240             // the errors should have been reported
 241             return false;
 242         }
 243     }
 244 
 245     /**
 246      * Gets the system ID from which the given DOM is parsed.
 247      * <p>
 248      * Poor-man's base URI.
 249      */
 250     public String getSystemId( Document dom ) {
 251         for (Map.Entry<String,Document> e : core.entrySet()) {
 252             if (e.getValue() == dom)
 253                 return e.getKey();
 254         }
 255         return null;
 256     }
 257 
 258     public Document parse( InputSource source, boolean root ) throws SAXException {
 259         if( source.getSystemId()==null )
 260             throw new IllegalArgumentException();
 261 
 262         return parse( source.getSystemId(), source, root );
 263     }
 264 
 265     /**
 266      * Parses an XML at the given location (
 267      * and XMLs referenced by it) into DOM trees
 268      * and stores them to this forest.
 269      *
 270      * @return the parsed DOM document object.
 271      */
 272     public Document parse( String systemId, boolean root ) throws SAXException, IOException {
 273 
 274         systemId = Options.normalizeSystemId(systemId);
 275 
 276         if( core.containsKey(systemId) )
 277             // this document has already been parsed. Just ignore.
 278             return core.get(systemId);
 279 
 280         InputSource is=null;
 281 
 282         // allow entity resolver to find the actual byte stream.
 283         if( entityResolver!=null )
 284             is = entityResolver.resolveEntity(null,systemId);
 285         if( is==null )
 286             is = new InputSource(systemId);
 287 
 288         // but we still use the original system Id as the key.
 289         return parse( systemId, is, root );
 290     }
 291 
 292     /**
 293      * Returns a {@link ContentHandler} to feed SAX events into.
 294      *
 295      * <p>
 296      * The client of this class can feed SAX events into the handler
 297      * to parse a document into this DOM forest.
 298      *
 299      * This version requires that the DOM object to be created and registered
 300      * to the map beforehand.
 301      */
 302     private ContentHandler getParserHandler( Document dom ) {
 303         ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
 304         handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
 305         handler = new VersionChecker(handler,errorReceiver,entityResolver);
 306 
 307         // insert the reference finder so that
 308         // included/imported schemas will be also parsed
 309         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
 310         f.setContentHandler(handler);
 311 
 312         if(errorReceiver!=null)
 313             f.setErrorHandler(errorReceiver);
 314         if(entityResolver!=null)
 315             f.setEntityResolver(entityResolver);
 316 
 317         return f;
 318     }
 319 
 320     public interface Handler extends ContentHandler {
 321         /**
 322          * Gets the DOM that was built.
 323          */
 324         public Document getDocument();
 325     }
 326 
 327     private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
 328     }
 329 
 330     /**
 331      * Returns a {@link ContentHandler} to feed SAX events into.
 332      *
 333      * <p>
 334      * The client of this class can feed SAX events into the handler
 335      * to parse a document into this DOM forest.
 336      */
 337     public Handler getParserHandler( String systemId, boolean root ) {
 338         final Document dom = documentBuilder.newDocument();
 339         core.put( systemId, dom );
 340         if(root)
 341             rootDocuments.add(systemId);
 342 
 343         ContentHandler handler = getParserHandler(dom);
 344 
 345         // we will register the DOM to the map once the system ID becomes available.
 346         // but the SAX allows the event source to not to provide that information,
 347         // so be prepared for such case.
 348         HandlerImpl x = new HandlerImpl() {
 349             public Document getDocument() {
 350                 return dom;
 351             }
 352         };
 353         x.setContentHandler(handler);
 354 
 355         return x;
 356    }
 357 
 358     /**
 359      * Parses the given document and add it to the DOM forest.
 360      *
 361      * @return
 362      *      null if there was a parse error. otherwise non-null.
 363      */
 364     public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
 365         Document dom = documentBuilder.newDocument();
 366 
 367         systemId = Options.normalizeSystemId(systemId);
 368 
 369         // put into the map before growing a tree, to
 370         // prevent recursive reference from causing infinite loop.
 371         core.put( systemId, dom );
 372         if(root)
 373             rootDocuments.add(systemId);
 374 
 375         try {
 376             XMLReader reader = parserFactory.newSAXParser().getXMLReader();
 377             reader.setContentHandler(getParserHandler(dom));
 378             if(errorReceiver!=null)
 379                 reader.setErrorHandler(errorReceiver);
 380             if(entityResolver!=null)
 381                 reader.setEntityResolver(entityResolver);
 382             reader.parse(inputSource);
 383         } catch( ParserConfigurationException e ) {
 384             // in practice, this exception won't happen.
 385             errorReceiver.error(e.getMessage(),e);
 386             core.remove(systemId);
 387             rootDocuments.remove(systemId);
 388             return null;
 389         } catch( IOException e ) {
 390             errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
 391             core.remove(systemId);
 392             rootDocuments.remove(systemId);
 393             return null;
 394         }
 395 
 396         return dom;
 397     }
 398 
 399     public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
 400         Document dom = documentBuilder.newDocument();
 401 
 402         systemId = Options.normalizeSystemId(systemId);
 403 
 404         if(root)
 405             rootDocuments.add(systemId);
 406 
 407         if(systemId==null)
 408             throw new IllegalArgumentException("system id cannot be null");
 409         core.put( systemId, dom );
 410 
 411         new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
 412 
 413         return dom;
 414     }
 415 
 416     /**
 417      * Performs internalization.
 418      *
 419      * This method should be called only once, only after all the
 420      * schemas are parsed.
 421      *
 422      * @return
 423      *      the returned bindings need to be applied after schema
 424      *      components are built.
 425      */
 426     public SCDBasedBindingSet transform(boolean enableSCD) {
 427         return Internalizer.transform(this,enableSCD);
 428     }
 429 
 430     /**
 431      * Performs the schema correctness check by using JAXP 1.3.
 432      *
 433      * <p>
 434      * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
 435      * doesn't handle inclusions very correctly (it ends up parsing it
 436      * from its original source, not in this tree), and because
 437      * it doesn't handle two documents for the same namespace very
 438      * well.
 439      *
 440      * <p>
 441      * We should eventually fix JAXP (and Xerces), but meanwhile
 442      * this weaker and potentially wrong correctness check is still
 443      * better than nothing when used inside JAX-WS (JAXB CLI and Ant
 444      * does a better job of checking this.)
 445      *
 446      * <p>
 447      * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
 448      */
 449     public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
 450         List<SAXSource> sources = new ArrayList<SAXSource>();
 451         for( String systemId : getRootDocuments() ) {
 452             Document dom = get(systemId);
 453             if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
 454                 continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
 455 
 456             SAXSource ss = createSAXSource(systemId);
 457             try {
 458                 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
 459             } catch (SAXException e) {
 460                 throw new AssertionError(e);    // Xerces wants this. See 6395322.
 461             }
 462             sources.add(ss);
 463         }
 464 
 465         try {
 466             sf.newSchema(sources.toArray(new SAXSource[0]));
 467         } catch (SAXException e) {
 468             // error should have been reported.
 469         } catch (RuntimeException e) {
 470             // JAXP RI isn't very trustworthy when it comes to schema error check,
 471             // and we know some cases where it just dies with NPE. So handle it gracefully.
 472             // this masks a bug in the JAXP RI, but we need a release that we have to make.
 473             try {
 474                 sf.getErrorHandler().warning(
 475                     new SAXParseException(Messages.format(
 476                         Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,e.getMessage()),
 477                         null,null,-1,-1,e));
 478             } catch (SAXException _) {
 479                 // ignore
 480             }
 481         }
 482     }
 483 
 484     /**
 485      * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
 486      * (instead of parsing the original source identified by the system ID.)
 487      */
 488     public @NotNull SAXSource createSAXSource(String systemId) {
 489         ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
 490             // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
 491             // handlers, since SAX allows handlers to be changed while parsing.
 492             public void parse(InputSource input) throws SAXException, IOException {
 493                 createParser().parse(input, this, this, this);
 494             }
 495 
 496             public void parse(String systemId) throws SAXException, IOException {
 497                 parse(new InputSource(systemId));
 498             }
 499         });
 500 
 501         return new SAXSource(reader,new InputSource(systemId));
 502     }
 503 
 504     /**
 505      * Creates {@link XMLParser} for XSOM which reads documents from
 506      * this DOMForest rather than doing a fresh parse.
 507      *
 508      * The net effect is that XSOM will read transformed XML Schemas
 509      * instead of the original documents.
 510      */
 511     public XMLParser createParser() {
 512         return new DOMForestParser(this,new JAXPParser());
 513     }
 514 
 515 
 516 
 517     public EntityResolver getEntityResolver() {
 518         return entityResolver;
 519     }
 520 
 521     public void setEntityResolver(EntityResolver entityResolver) {
 522         this.entityResolver = entityResolver;
 523     }
 524 
 525     public ErrorReceiver getErrorHandler() {
 526         return errorReceiver;
 527     }
 528 
 529     public void setErrorHandler(ErrorReceiver errorHandler) {
 530         this.errorReceiver = errorHandler;
 531     }
 532 
 533     /**
 534      * Gets all the parsed documents.
 535      */
 536     public Document[] listDocuments() {
 537         return core.values().toArray(new Document[core.size()]);
 538     }
 539 
 540     /**
 541      * Gets all the system IDs of the documents.
 542      */
 543     public String[] listSystemIDs() {
 544         return core.keySet().toArray(new String[core.keySet().size()]);
 545     }
 546 
 547     /**
 548      * Dumps the contents of the forest to the specified stream.
 549      *
 550      * This is a debug method. As such, error handling is sloppy.
 551      */
 552     public void dump( OutputStream out ) throws IOException {
 553         try {
 554             // create identity transformer
 555             Transformer it = TransformerFactory.newInstance().newTransformer();
 556 
 557             for (Map.Entry<String, Document> e : core.entrySet()) {
 558                 out.write( ("---<< "+e.getKey()+'\n').getBytes() );
 559 
 560                 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
 561                 dw.setIndentStep("  ");
 562                 it.transform( new DOMSource(e.getValue()),
 563                     new SAXResult(dw));
 564 
 565                 out.write( "\n\n\n".getBytes() );
 566             }
 567         } catch( TransformerException e ) {
 568             e.printStackTrace();
 569         }
 570     }
 571 }