1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.internal.xjc.reader.internalizer; 27 28 import java.io.IOException; 29 import java.io.OutputStream; 30 import java.io.OutputStreamWriter; 31 import java.util.ArrayList; 32 import java.util.Collections; 33 import java.util.HashMap; 34 import java.util.HashSet; 35 import java.util.List; 36 import java.util.Map; 37 import java.util.Set; 38 39 import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; 40 import javax.xml.parsers.DocumentBuilder; 41 import javax.xml.parsers.DocumentBuilderFactory; 42 import javax.xml.parsers.ParserConfigurationException; 43 import javax.xml.parsers.SAXParserFactory; 44 import javax.xml.stream.XMLStreamException; 45 import javax.xml.stream.XMLStreamReader; 46 import javax.xml.transform.Source; 47 import javax.xml.transform.Transformer; 48 import javax.xml.transform.TransformerException; 49 import javax.xml.transform.TransformerFactory; 50 import javax.xml.transform.dom.DOMSource; 51 import javax.xml.transform.sax.SAXResult; 52 import javax.xml.transform.sax.SAXSource; 53 import javax.xml.validation.SchemaFactory; 54 55 import com.sun.istack.internal.NotNull; 56 import com.sun.istack.internal.XMLStreamReaderToContentHandler; 57 import com.sun.tools.internal.xjc.ErrorReceiver; 58 import com.sun.tools.internal.xjc.Options; 59 import com.sun.tools.internal.xjc.reader.Const; 60 import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker; 61 import com.sun.tools.internal.xjc.util.ErrorReceiverFilter; 62 import com.sun.xml.internal.bind.marshaller.DataWriter; 63 import com.sun.xml.internal.xsom.parser.JAXPParser; 64 import com.sun.xml.internal.xsom.parser.XMLParser; 65 66 import org.w3c.dom.Document; 67 import org.w3c.dom.Element; 68 import org.xml.sax.ContentHandler; 69 import org.xml.sax.EntityResolver; 70 import org.xml.sax.InputSource; 71 import org.xml.sax.SAXException; 72 import org.xml.sax.SAXParseException; 73 import org.xml.sax.XMLReader; 74 import org.xml.sax.helpers.XMLFilterImpl; 75 76 77 /** 78 * Builds a DOM forest and maintains association from 79 * system IDs to DOM trees. 80 * 81 * <p> 82 * A forest is a transitive reflexive closure of referenced documents. 83 * IOW, if a document is in a forest, all the documents referenced from 84 * it is in a forest, too. To support this semantics, {@link DOMForest} 85 * uses {@link InternalizationLogic} to find referenced documents. 86 * 87 * <p> 88 * Some documents are marked as "root"s, meaning those documents were 89 * put into a forest explicitly, not because it is referenced from another 90 * document. (However, a root document can be referenced from other 91 * documents, too.) 92 * 93 * @author 94 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) 95 */ 96 public final class DOMForest { 97 /** actual data storage map<SystemId,Document>. */ 98 private final Map<String,Document> core = new HashMap<String,Document>(); 99 100 /** 101 * To correctly feed documents to a schema parser, we need to remember 102 * which documents (of the forest) were given as the root 103 * documents, and which of them are read as included/imported 104 * documents. 105 * 106 * <p> 107 * Set of system ids as strings. 108 */ 109 private final Set<String> rootDocuments = new HashSet<String>(); 110 111 /** Stores location information for all the trees in this forest. */ 112 public final LocatorTable locatorTable = new LocatorTable(); 113 114 /** Stores all the outer-most <jaxb:bindings> customizations. */ 115 public final Set<Element> outerMostBindings = new HashSet<Element>(); 116 117 /** Used to resolve references to other schema documents. */ 118 private EntityResolver entityResolver = null; 119 120 /** Errors encountered during the parsing will be sent to this object. */ 121 private ErrorReceiver errorReceiver = null; 122 123 /** Schema language dependent part of the processing. */ 124 protected final InternalizationLogic logic; 125 126 private final SAXParserFactory parserFactory; 127 private final DocumentBuilder documentBuilder; 128 129 130 public DOMForest( 131 SAXParserFactory parserFactory, DocumentBuilder documentBuilder, 132 InternalizationLogic logic ) { 133 134 this.parserFactory = parserFactory; 135 this.documentBuilder = documentBuilder; 136 this.logic = logic; 137 } 138 139 public DOMForest( InternalizationLogic logic ) { 140 try { 141 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 142 dbf.setNamespaceAware(true); 143 this.documentBuilder = dbf.newDocumentBuilder(); 144 145 this.parserFactory = SAXParserFactory.newInstance(); 146 this.parserFactory.setNamespaceAware(true); 147 } catch( ParserConfigurationException e ) { 148 throw new AssertionError(e); 149 } 150 151 this.logic = logic; 152 } 153 154 /** 155 * Gets the DOM tree associated with the specified system ID, 156 * or null if none is found. 157 */ 158 public Document get( String systemId ) { 159 Document doc = core.get(systemId); 160 161 if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { 162 // As of JDK1.4, java.net.URL.toExternal method returns URLs like 163 // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. 164 // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), 165 // and this descripancy breaks DOM look up by system ID. 166 167 // this extra check solves this problem. 168 doc = core.get( "file://"+systemId.substring(5) ); 169 } 170 171 if( doc==null && systemId.startsWith("file:") ) { 172 // on Windows, filenames are case insensitive. 173 // perform case-insensitive search for improved user experience 174 String systemPath = getPath(systemId); 175 for (String key : core.keySet()) { 176 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { 177 doc = core.get(key); 178 break; 179 } 180 } 181 } 182 183 return doc; 184 } 185 186 /** 187 * Strips off the leading 'file:///' portion from an URL. 188 */ 189 private String getPath(String key) { 190 key = key.substring(5); // skip 'file:' 191 while(key.length()>0 && key.charAt(0)=='/') 192 key = key.substring(1); 193 return key; 194 } 195 196 /** 197 * Returns a read-only set of root document system IDs. 198 */ 199 public Set<String> getRootDocuments() { 200 return Collections.unmodifiableSet(rootDocuments); 201 } 202 203 /** 204 * Picks one document at random and returns it. 205 */ 206 public Document getOneDocument() { 207 for (Document dom : core.values()) { 208 if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 209 return dom; 210 } 211 // we should have caught this error very early on 212 throw new AssertionError(); 213 } 214 215 /** 216 * Checks the correctness of the XML Schema documents and return true 217 * if it's OK. 218 * 219 * <p> 220 * This method performs a weaker version of the tests where error messages 221 * are provided without line number information. So whenever possible 222 * use {@link SchemaConstraintChecker}. 223 * 224 * @see SchemaConstraintChecker 225 */ 226 public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { 227 try { 228 SchemaFactory sf = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI); 229 ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); 230 sf.setErrorHandler(filter); 231 Set<String> roots = getRootDocuments(); 232 Source[] sources = new Source[roots.size()]; 233 int i=0; 234 for (String root : roots) { 235 sources[i++] = new DOMSource(get(root),root); 236 } 237 sf.newSchema(sources); 238 return !filter.hadError(); 239 } catch (SAXException e) { 240 // the errors should have been reported 241 return false; 242 } 243 } 244 245 /** 246 * Gets the system ID from which the given DOM is parsed. 247 * <p> 248 * Poor-man's base URI. 249 */ 250 public String getSystemId( Document dom ) { 251 for (Map.Entry<String,Document> e : core.entrySet()) { 252 if (e.getValue() == dom) 253 return e.getKey(); 254 } 255 return null; 256 } 257 258 public Document parse( InputSource source, boolean root ) throws SAXException { 259 if( source.getSystemId()==null ) 260 throw new IllegalArgumentException(); 261 262 return parse( source.getSystemId(), source, root ); 263 } 264 265 /** 266 * Parses an XML at the given location ( 267 * and XMLs referenced by it) into DOM trees 268 * and stores them to this forest. 269 * 270 * @return the parsed DOM document object. 271 */ 272 public Document parse( String systemId, boolean root ) throws SAXException, IOException { 273 274 systemId = Options.normalizeSystemId(systemId); 275 276 if( core.containsKey(systemId) ) 277 // this document has already been parsed. Just ignore. 278 return core.get(systemId); 279 280 InputSource is=null; 281 282 // allow entity resolver to find the actual byte stream. 283 if( entityResolver!=null ) 284 is = entityResolver.resolveEntity(null,systemId); 285 if( is==null ) 286 is = new InputSource(systemId); 287 288 // but we still use the original system Id as the key. 289 return parse( systemId, is, root ); 290 } 291 292 /** 293 * Returns a {@link ContentHandler} to feed SAX events into. 294 * 295 * <p> 296 * The client of this class can feed SAX events into the handler 297 * to parse a document into this DOM forest. 298 * 299 * This version requires that the DOM object to be created and registered 300 * to the map beforehand. 301 */ 302 private ContentHandler getParserHandler( Document dom ) { 303 ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); 304 handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); 305 handler = new VersionChecker(handler,errorReceiver,entityResolver); 306 307 // insert the reference finder so that 308 // included/imported schemas will be also parsed 309 XMLFilterImpl f = logic.createExternalReferenceFinder(this); 310 f.setContentHandler(handler); 311 312 if(errorReceiver!=null) 313 f.setErrorHandler(errorReceiver); 314 if(entityResolver!=null) 315 f.setEntityResolver(entityResolver); 316 317 return f; 318 } 319 320 public interface Handler extends ContentHandler { 321 /** 322 * Gets the DOM that was built. 323 */ 324 public Document getDocument(); 325 } 326 327 private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { 328 } 329 330 /** 331 * Returns a {@link ContentHandler} to feed SAX events into. 332 * 333 * <p> 334 * The client of this class can feed SAX events into the handler 335 * to parse a document into this DOM forest. 336 */ 337 public Handler getParserHandler( String systemId, boolean root ) { 338 final Document dom = documentBuilder.newDocument(); 339 core.put( systemId, dom ); 340 if(root) 341 rootDocuments.add(systemId); 342 343 ContentHandler handler = getParserHandler(dom); 344 345 // we will register the DOM to the map once the system ID becomes available. 346 // but the SAX allows the event source to not to provide that information, 347 // so be prepared for such case. 348 HandlerImpl x = new HandlerImpl() { 349 public Document getDocument() { 350 return dom; 351 } 352 }; 353 x.setContentHandler(handler); 354 355 return x; 356 } 357 358 /** 359 * Parses the given document and add it to the DOM forest. 360 * 361 * @return 362 * null if there was a parse error. otherwise non-null. 363 */ 364 public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { 365 Document dom = documentBuilder.newDocument(); 366 367 systemId = Options.normalizeSystemId(systemId); 368 369 // put into the map before growing a tree, to 370 // prevent recursive reference from causing infinite loop. 371 core.put( systemId, dom ); 372 if(root) 373 rootDocuments.add(systemId); 374 375 try { 376 XMLReader reader = parserFactory.newSAXParser().getXMLReader(); 377 reader.setContentHandler(getParserHandler(dom)); 378 if(errorReceiver!=null) 379 reader.setErrorHandler(errorReceiver); 380 if(entityResolver!=null) 381 reader.setEntityResolver(entityResolver); 382 reader.parse(inputSource); 383 } catch( ParserConfigurationException e ) { 384 // in practice, this exception won't happen. 385 errorReceiver.error(e.getMessage(),e); 386 core.remove(systemId); 387 rootDocuments.remove(systemId); 388 return null; 389 } catch( IOException e ) { 390 errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); 391 core.remove(systemId); 392 rootDocuments.remove(systemId); 393 return null; 394 } 395 396 return dom; 397 } 398 399 public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { 400 Document dom = documentBuilder.newDocument(); 401 402 systemId = Options.normalizeSystemId(systemId); 403 404 if(root) 405 rootDocuments.add(systemId); 406 407 if(systemId==null) 408 throw new IllegalArgumentException("system id cannot be null"); 409 core.put( systemId, dom ); 410 411 new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); 412 413 return dom; 414 } 415 416 /** 417 * Performs internalization. 418 * 419 * This method should be called only once, only after all the 420 * schemas are parsed. 421 * 422 * @return 423 * the returned bindings need to be applied after schema 424 * components are built. 425 */ 426 public SCDBasedBindingSet transform(boolean enableSCD) { 427 return Internalizer.transform(this,enableSCD); 428 } 429 430 /** 431 * Performs the schema correctness check by using JAXP 1.3. 432 * 433 * <p> 434 * This is "weak", because {@link SchemaFactory#newSchema(Source[])} 435 * doesn't handle inclusions very correctly (it ends up parsing it 436 * from its original source, not in this tree), and because 437 * it doesn't handle two documents for the same namespace very 438 * well. 439 * 440 * <p> 441 * We should eventually fix JAXP (and Xerces), but meanwhile 442 * this weaker and potentially wrong correctness check is still 443 * better than nothing when used inside JAX-WS (JAXB CLI and Ant 444 * does a better job of checking this.) 445 * 446 * <p> 447 * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. 448 */ 449 public void weakSchemaCorrectnessCheck(SchemaFactory sf) { 450 List<SAXSource> sources = new ArrayList<SAXSource>(); 451 for( String systemId : getRootDocuments() ) { 452 Document dom = get(systemId); 453 if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 454 continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error 455 456 SAXSource ss = createSAXSource(systemId); 457 try { 458 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); 459 } catch (SAXException e) { 460 throw new AssertionError(e); // Xerces wants this. See 6395322. 461 } 462 sources.add(ss); 463 } 464 465 try { 466 sf.newSchema(sources.toArray(new SAXSource[0])); 467 } catch (SAXException e) { 468 // error should have been reported. 469 } catch (RuntimeException e) { 470 // JAXP RI isn't very trustworthy when it comes to schema error check, 471 // and we know some cases where it just dies with NPE. So handle it gracefully. 472 // this masks a bug in the JAXP RI, but we need a release that we have to make. 473 try { 474 sf.getErrorHandler().warning( 475 new SAXParseException(Messages.format( 476 Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,e.getMessage()), 477 null,null,-1,-1,e)); 478 } catch (SAXException _) { 479 // ignore 480 } 481 } 482 } 483 484 /** 485 * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} 486 * (instead of parsing the original source identified by the system ID.) 487 */ 488 public @NotNull SAXSource createSAXSource(String systemId) { 489 ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { 490 // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect 491 // handlers, since SAX allows handlers to be changed while parsing. 492 public void parse(InputSource input) throws SAXException, IOException { 493 createParser().parse(input, this, this, this); 494 } 495 496 public void parse(String systemId) throws SAXException, IOException { 497 parse(new InputSource(systemId)); 498 } 499 }); 500 501 return new SAXSource(reader,new InputSource(systemId)); 502 } 503 504 /** 505 * Creates {@link XMLParser} for XSOM which reads documents from 506 * this DOMForest rather than doing a fresh parse. 507 * 508 * The net effect is that XSOM will read transformed XML Schemas 509 * instead of the original documents. 510 */ 511 public XMLParser createParser() { 512 return new DOMForestParser(this,new JAXPParser()); 513 } 514 515 516 517 public EntityResolver getEntityResolver() { 518 return entityResolver; 519 } 520 521 public void setEntityResolver(EntityResolver entityResolver) { 522 this.entityResolver = entityResolver; 523 } 524 525 public ErrorReceiver getErrorHandler() { 526 return errorReceiver; 527 } 528 529 public void setErrorHandler(ErrorReceiver errorHandler) { 530 this.errorReceiver = errorHandler; 531 } 532 533 /** 534 * Gets all the parsed documents. 535 */ 536 public Document[] listDocuments() { 537 return core.values().toArray(new Document[core.size()]); 538 } 539 540 /** 541 * Gets all the system IDs of the documents. 542 */ 543 public String[] listSystemIDs() { 544 return core.keySet().toArray(new String[core.keySet().size()]); 545 } 546 547 /** 548 * Dumps the contents of the forest to the specified stream. 549 * 550 * This is a debug method. As such, error handling is sloppy. 551 */ 552 public void dump( OutputStream out ) throws IOException { 553 try { 554 // create identity transformer 555 Transformer it = TransformerFactory.newInstance().newTransformer(); 556 557 for (Map.Entry<String, Document> e : core.entrySet()) { 558 out.write( ("---<< "+e.getKey()+'\n').getBytes() ); 559 560 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); 561 dw.setIndentStep(" "); 562 it.transform( new DOMSource(e.getValue()), 563 new SAXResult(dw)); 564 565 out.write( "\n\n\n".getBytes() ); 566 } 567 } catch( TransformerException e ) { 568 e.printStackTrace(); 569 } 570 } 571 }