1 /* 2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.internal.xjc.reader.internalizer; 27 28 import com.sun.istack.internal.NotNull; 29 import com.sun.istack.internal.XMLStreamReaderToContentHandler; 30 import com.sun.tools.internal.xjc.ErrorReceiver; 31 import com.sun.tools.internal.xjc.Options; 32 import com.sun.tools.internal.xjc.reader.Const; 33 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter; 34 import com.sun.xml.internal.bind.marshaller.DataWriter; 35 import com.sun.xml.internal.bind.v2.util.XmlFactory; 36 import com.sun.xml.internal.xsom.parser.JAXPParser; 37 import com.sun.xml.internal.xsom.parser.XMLParser; 38 import org.w3c.dom.Document; 39 import org.w3c.dom.Element; 40 import org.xml.sax.*; 41 import org.xml.sax.helpers.XMLFilterImpl; 42 43 import javax.xml.parsers.DocumentBuilder; 44 import javax.xml.parsers.DocumentBuilderFactory; 45 import javax.xml.parsers.ParserConfigurationException; 46 import javax.xml.parsers.SAXParserFactory; 47 import javax.xml.stream.XMLStreamException; 48 import javax.xml.stream.XMLStreamReader; 49 import javax.xml.transform.Source; 50 import javax.xml.transform.Transformer; 51 import javax.xml.transform.TransformerException; 52 import javax.xml.transform.TransformerFactory; 53 import javax.xml.transform.dom.DOMSource; 54 import javax.xml.transform.sax.SAXResult; 55 import javax.xml.transform.sax.SAXSource; 56 import javax.xml.validation.SchemaFactory; 57 import java.io.IOException; 58 import java.io.OutputStream; 59 import java.io.OutputStreamWriter; 60 import java.util.*; 61 62 import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess; 63 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; 64 65 66 /** 67 * Builds a DOM forest and maintains association from 68 * system IDs to DOM trees. 69 * 70 * <p> 71 * A forest is a transitive reflexive closure of referenced documents. 72 * IOW, if a document is in a forest, all the documents referenced from 73 * it is in a forest, too. To support this semantics, {@link DOMForest} 74 * uses {@link InternalizationLogic} to find referenced documents. 75 * 76 * <p> 77 * Some documents are marked as "root"s, meaning those documents were 78 * put into a forest explicitly, not because it is referenced from another 79 * document. (However, a root document can be referenced from other 80 * documents, too.) 81 * 82 * @author 83 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) 84 */ 85 public final class DOMForest { 86 /** actual data storage map<SystemId,Document>. */ 87 private final Map<String,Document> core = new HashMap<String,Document>(); 88 89 /** 90 * To correctly feed documents to a schema parser, we need to remember 91 * which documents (of the forest) were given as the root 92 * documents, and which of them are read as included/imported 93 * documents. 94 * 95 * <p> 96 * Set of system ids as strings. 97 */ 98 private final Set<String> rootDocuments = new LinkedHashSet<String>(); 99 100 /** Stores location information for all the trees in this forest. */ 101 public final LocatorTable locatorTable = new LocatorTable(); 102 103 /** Stores all the outer-most <jaxb:bindings> customizations. */ 104 public final Set<Element> outerMostBindings = new HashSet<Element>(); 105 106 /** Used to resolve references to other schema documents. */ 107 private EntityResolver entityResolver = null; 108 109 /** Errors encountered during the parsing will be sent to this object. */ 110 private ErrorReceiver errorReceiver = null; 111 112 /** Schema language dependent part of the processing. */ 113 protected final InternalizationLogic logic; 114 115 private final SAXParserFactory parserFactory; 116 private final DocumentBuilder documentBuilder; 117 118 private final Options options; 119 120 public DOMForest( 121 SAXParserFactory parserFactory, DocumentBuilder documentBuilder, 122 InternalizationLogic logic ) { 123 124 this.parserFactory = parserFactory; 125 this.documentBuilder = documentBuilder; 126 this.logic = logic; 127 this.options = null; 128 } 129 130 public DOMForest( InternalizationLogic logic, Options opt ) { 131 132 if (opt == null) throw new AssertionError("Options object null"); 133 this.options = opt; 134 135 try { 136 DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity); 137 this.documentBuilder = dbf.newDocumentBuilder(); 138 this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity); 139 } catch( ParserConfigurationException e ) { 140 throw new AssertionError(e); 141 } 142 143 this.logic = logic; 144 } 145 146 /** 147 * Gets the DOM tree associated with the specified system ID, 148 * or null if none is found. 149 */ 150 public Document get( String systemId ) { 151 Document doc = core.get(systemId); 152 153 if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { 154 // As of JDK1.4, java.net.URL.toExternal method returns URLs like 155 // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. 156 // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), 157 // and this descripancy breaks DOM look up by system ID. 158 159 // this extra check solves this problem. 160 doc = core.get( "file://"+systemId.substring(5) ); 161 } 162 163 if( doc==null && systemId.startsWith("file:") ) { 164 // on Windows, filenames are case insensitive. 165 // perform case-insensitive search for improved user experience 166 String systemPath = getPath(systemId); 167 for (String key : core.keySet()) { 168 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { 169 doc = core.get(key); 170 break; 171 } 172 } 173 } 174 175 return doc; 176 } 177 178 /** 179 * Strips off the leading 'file:///' portion from an URL. 180 */ 181 private String getPath(String key) { 182 key = key.substring(5); // skip 'file:' 183 while(key.length()>0 && key.charAt(0)=='/') { 184 key = key.substring(1); 185 } 186 return key; 187 } 188 189 /** 190 * Returns a read-only set of root document system IDs. 191 */ 192 public Set<String> getRootDocuments() { 193 return Collections.unmodifiableSet(rootDocuments); 194 } 195 196 /** 197 * Picks one document at random and returns it. 198 */ 199 public Document getOneDocument() { 200 for (Document dom : core.values()) { 201 if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 202 return dom; 203 } 204 // we should have caught this error very early on 205 throw new AssertionError(); 206 } 207 208 /** 209 * Checks the correctness of the XML Schema documents and return true 210 * if it's OK. 211 * 212 * <p> 213 * This method performs a weaker version of the tests where error messages 214 * are provided without line number information. So whenever possible 215 * use {@link SchemaConstraintChecker}. 216 * 217 * @see SchemaConstraintChecker 218 */ 219 public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { 220 try { 221 boolean disableXmlSecurity = false; 222 if (options != null) { 223 disableXmlSecurity = options.disableXmlSecurity; 224 } 225 SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity); 226 ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); 227 sf.setErrorHandler(filter); 228 Set<String> roots = getRootDocuments(); 229 Source[] sources = new Source[roots.size()]; 230 int i=0; 231 for (String root : roots) { 232 sources[i++] = new DOMSource(get(root),root); 233 } 234 sf.newSchema(sources); 235 return !filter.hadError(); 236 } catch (SAXException e) { 237 // the errors should have been reported 238 return false; 239 } 240 } 241 242 /** 243 * Gets the system ID from which the given DOM is parsed. 244 * <p> 245 * Poor-man's base URI. 246 */ 247 public String getSystemId( Document dom ) { 248 for (Map.Entry<String,Document> e : core.entrySet()) { 249 if (e.getValue() == dom) 250 return e.getKey(); 251 } 252 return null; 253 } 254 255 public Document parse( InputSource source, boolean root ) throws SAXException { 256 if( source.getSystemId()==null ) 257 throw new IllegalArgumentException(); 258 259 return parse( source.getSystemId(), source, root ); 260 } 261 262 /** 263 * Parses an XML at the given location ( 264 * and XMLs referenced by it) into DOM trees 265 * and stores them to this forest. 266 * 267 * @return the parsed DOM document object. 268 */ 269 public Document parse( String systemId, boolean root ) throws SAXException, IOException { 270 271 systemId = Options.normalizeSystemId(systemId); 272 273 if( core.containsKey(systemId) ) 274 // this document has already been parsed. Just ignore. 275 return core.get(systemId); 276 277 InputSource is=null; 278 279 // allow entity resolver to find the actual byte stream. 280 if( entityResolver!=null ) 281 is = entityResolver.resolveEntity(null,systemId); 282 if( is==null ) 283 is = new InputSource(systemId); 284 285 // but we still use the original system Id as the key. 286 return parse( systemId, is, root ); 287 } 288 289 /** 290 * Returns a {@link ContentHandler} to feed SAX events into. 291 * 292 * <p> 293 * The client of this class can feed SAX events into the handler 294 * to parse a document into this DOM forest. 295 * 296 * This version requires that the DOM object to be created and registered 297 * to the map beforehand. 298 */ 299 private ContentHandler getParserHandler( Document dom ) { 300 ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); 301 handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); 302 handler = new VersionChecker(handler,errorReceiver,entityResolver); 303 304 // insert the reference finder so that 305 // included/imported schemas will be also parsed 306 XMLFilterImpl f = logic.createExternalReferenceFinder(this); 307 f.setContentHandler(handler); 308 309 if(errorReceiver!=null) 310 f.setErrorHandler(errorReceiver); 311 if(entityResolver!=null) 312 f.setEntityResolver(entityResolver); 313 314 return f; 315 } 316 317 public interface Handler extends ContentHandler { 318 /** 319 * Gets the DOM that was built. 320 */ 321 public Document getDocument(); 322 } 323 324 private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { 325 } 326 327 /** 328 * Returns a {@link ContentHandler} to feed SAX events into. 329 * 330 * <p> 331 * The client of this class can feed SAX events into the handler 332 * to parse a document into this DOM forest. 333 */ 334 public Handler getParserHandler( String systemId, boolean root ) { 335 final Document dom = documentBuilder.newDocument(); 336 core.put( systemId, dom ); 337 if(root) 338 rootDocuments.add(systemId); 339 340 ContentHandler handler = getParserHandler(dom); 341 342 // we will register the DOM to the map once the system ID becomes available. 343 // but the SAX allows the event source to not to provide that information, 344 // so be prepared for such case. 345 HandlerImpl x = new HandlerImpl() { 346 public Document getDocument() { 347 return dom; 348 } 349 }; 350 x.setContentHandler(handler); 351 352 return x; 353 } 354 355 /** 356 * Parses the given document and add it to the DOM forest. 357 * 358 * @return 359 * null if there was a parse error. otherwise non-null. 360 */ 361 public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { 362 Document dom = documentBuilder.newDocument(); 363 364 systemId = Options.normalizeSystemId(systemId); 365 366 // put into the map before growing a tree, to 367 // prevent recursive reference from causing infinite loop. 368 core.put( systemId, dom ); 369 if(root) 370 rootDocuments.add(systemId); 371 372 try { 373 XMLReader reader = parserFactory.newSAXParser().getXMLReader(); 374 reader.setContentHandler(getParserHandler(dom)); 375 if(errorReceiver!=null) 376 reader.setErrorHandler(errorReceiver); 377 if(entityResolver!=null) 378 reader.setEntityResolver(entityResolver); 379 reader.parse(inputSource); 380 } catch( ParserConfigurationException e ) { 381 // in practice, this exception won't happen. 382 errorReceiver.error(e.getMessage(),e); 383 core.remove(systemId); 384 rootDocuments.remove(systemId); 385 return null; 386 } catch( IOException e ) { 387 errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); 388 core.remove(systemId); 389 rootDocuments.remove(systemId); 390 return null; 391 } 392 393 return dom; 394 } 395 396 public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { 397 Document dom = documentBuilder.newDocument(); 398 399 systemId = Options.normalizeSystemId(systemId); 400 401 if(root) 402 rootDocuments.add(systemId); 403 404 if(systemId==null) 405 throw new IllegalArgumentException("system id cannot be null"); 406 core.put( systemId, dom ); 407 408 new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); 409 410 return dom; 411 } 412 413 /** 414 * Performs internalization. 415 * 416 * This method should be called only once, only after all the 417 * schemas are parsed. 418 * 419 * @return 420 * the returned bindings need to be applied after schema 421 * components are built. 422 */ 423 public SCDBasedBindingSet transform(boolean enableSCD) { 424 return Internalizer.transform(this, enableSCD, options.disableXmlSecurity); 425 } 426 427 /** 428 * Performs the schema correctness check by using JAXP 1.3. 429 * 430 * <p> 431 * This is "weak", because {@link SchemaFactory#newSchema(Source[])} 432 * doesn't handle inclusions very correctly (it ends up parsing it 433 * from its original source, not in this tree), and because 434 * it doesn't handle two documents for the same namespace very 435 * well. 436 * 437 * <p> 438 * We should eventually fix JAXP (and Xerces), but meanwhile 439 * this weaker and potentially wrong correctness check is still 440 * better than nothing when used inside JAX-WS (JAXB CLI and Ant 441 * does a better job of checking this.) 442 * 443 * <p> 444 * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. 445 */ 446 public void weakSchemaCorrectnessCheck(SchemaFactory sf) { 447 List<SAXSource> sources = new ArrayList<SAXSource>(); 448 for( String systemId : getRootDocuments() ) { 449 Document dom = get(systemId); 450 if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 451 continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error 452 453 SAXSource ss = createSAXSource(systemId); 454 try { 455 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); 456 } catch (SAXException e) { 457 throw new AssertionError(e); // Xerces wants this. See 6395322. 458 } 459 sources.add(ss); 460 } 461 462 try { 463 allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0])); 464 } catch (SAXException e) { 465 // error should have been reported. 466 } catch (RuntimeException re) { 467 // JAXP RI isn't very trustworthy when it comes to schema error check, 468 // and we know some cases where it just dies with NPE. So handle it gracefully. 469 // this masks a bug in the JAXP RI, but we need a release that we have to make. 470 try { 471 sf.getErrorHandler().warning( 472 new SAXParseException(Messages.format( 473 Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()), 474 null,null,-1,-1,re)); 475 } catch (SAXException e) { 476 // ignore 477 } 478 } 479 } 480 481 /** 482 * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} 483 * (instead of parsing the original source identified by the system ID.) 484 */ 485 public @NotNull SAXSource createSAXSource(String systemId) { 486 ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { 487 // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect 488 // handlers, since SAX allows handlers to be changed while parsing. 489 @Override 490 public void parse(InputSource input) throws SAXException, IOException { 491 createParser().parse(input, this, this, this); 492 } 493 494 @Override 495 public void parse(String systemId) throws SAXException, IOException { 496 parse(new InputSource(systemId)); 497 } 498 }); 499 500 return new SAXSource(reader,new InputSource(systemId)); 501 } 502 503 /** 504 * Creates {@link XMLParser} for XSOM which reads documents from 505 * this DOMForest rather than doing a fresh parse. 506 * 507 * The net effect is that XSOM will read transformed XML Schemas 508 * instead of the original documents. 509 */ 510 public XMLParser createParser() { 511 return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity))); 512 } 513 514 public EntityResolver getEntityResolver() { 515 return entityResolver; 516 } 517 518 public void setEntityResolver(EntityResolver entityResolver) { 519 this.entityResolver = entityResolver; 520 } 521 522 public ErrorReceiver getErrorHandler() { 523 return errorReceiver; 524 } 525 526 public void setErrorHandler(ErrorReceiver errorHandler) { 527 this.errorReceiver = errorHandler; 528 } 529 530 /** 531 * Gets all the parsed documents. 532 */ 533 public Document[] listDocuments() { 534 return core.values().toArray(new Document[core.size()]); 535 } 536 537 /** 538 * Gets all the system IDs of the documents. 539 */ 540 public String[] listSystemIDs() { 541 return core.keySet().toArray(new String[core.keySet().size()]); 542 } 543 544 /** 545 * Dumps the contents of the forest to the specified stream. 546 * 547 * This is a debug method. As such, error handling is sloppy. 548 */ 549 @SuppressWarnings("CallToThreadDumpStack") 550 public void dump( OutputStream out ) throws IOException { 551 try { 552 // create identity transformer 553 boolean disableXmlSecurity = false; 554 if (options != null) { 555 disableXmlSecurity = options.disableXmlSecurity; 556 } 557 TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity); 558 Transformer it = tf.newTransformer(); 559 560 for (Map.Entry<String, Document> e : core.entrySet()) { 561 out.write( ("---<< "+e.getKey()+'\n').getBytes() ); 562 563 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); 564 dw.setIndentStep(" "); 565 it.transform( new DOMSource(e.getValue()), 566 new SAXResult(dw)); 567 568 out.write( "\n\n\n".getBytes() ); 569 } 570 } catch( TransformerException e ) { 571 e.printStackTrace(); 572 } 573 } 574 }