1 /*
   2  * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 package com.sun.org.apache.xalan.internal.xsltc.dom;
  21 
  22 import com.sun.org.apache.xml.internal.dtm.DTM;
  23 import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBase;
  24 import com.sun.org.apache.xml.internal.dtm.DTMException;
  25 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  26 import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
  27 import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  28 import com.sun.org.apache.xml.internal.res.XMLMessages;
  29 import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
  30 import com.sun.org.apache.xml.internal.utils.XMLReaderManager;
  31 import com.sun.org.apache.xalan.internal.xsltc.runtime.Constants;
  32 import com.sun.org.apache.xalan.internal.xsltc.trax.DOM2SAX;
  33 import com.sun.org.apache.xalan.internal.xsltc.trax.StAXEvent2SAX;
  34 import com.sun.org.apache.xalan.internal.xsltc.trax.StAXStream2SAX;
  35 import javax.xml.stream.XMLEventReader;
  36 import javax.xml.stream.XMLStreamReader;
  37 import javax.xml.transform.Source;
  38 import javax.xml.transform.dom.DOMSource;
  39 import javax.xml.transform.sax.SAXSource;
  40 import javax.xml.transform.stream.StreamSource;
  41 import javax.xml.transform.stax.StAXSource;
  42 import org.xml.sax.InputSource;
  43 import org.xml.sax.SAXException;
  44 import org.xml.sax.SAXNotRecognizedException;
  45 import org.xml.sax.SAXNotSupportedException;
  46 import org.xml.sax.XMLReader;
  47 
  48 /**
  49  * The XSLTC DTMManager implementation.
  50  */
  51 public class XSLTCDTMManager extends DTMManagerDefault {
  52 
  53     /**
  54      * To enable switching namespace awareness of SAX Reader for stream sources.
  55      */
  56     protected boolean m_namespaceAware = true;
  57 
  58     /**
  59      * Constructor XSLTCDTMManager
  60      *
  61      */
  62     public XSLTCDTMManager() {
  63         super();
  64     }
  65 
  66     /**
  67      * Obtain a new instance of a <code>DTMManager</code>.
  68      * This static method creates a new factory instance.
  69      * The current implementation just returns a new XSLTCDTMManager instance.
  70      */
  71     public static XSLTCDTMManager newInstance() {
  72         return new XSLTCDTMManager();
  73     }
  74 
  75     /**
  76      * Creates a new instance of the XSLTC DTM Manager service.
  77      * Creates a new instance of the default class
  78      * <code>com.sun.org.apache.xalan.internal.xsltc.dom.XSLTCDTMManager</code>.
  79      */
  80     public static XSLTCDTMManager createNewDTMManagerInstance() {
  81         return newInstance();
  82     }
  83 
  84     /**
  85      * Configure namespace awareness
  86      * This is used when creating the default SAX XMLReader.
  87      */
  88     public void setNamespaceAware(boolean namespaceAware) {
  89         m_namespaceAware = namespaceAware;
  90     }
  91 
  92     /**
  93      * This method returns the SAX2 parser to use with the InputSource
  94      * obtained from this URI.
  95      * It may return null if any SAX2-conformant XML parser can be used,
  96      * or if getInputSource() will also return null. The parser must
  97      * be free for use (i.e., not currently in use for another parse().
  98      * After use of the parser is completed, the releaseXMLReader(XMLReader)
  99      * must be called.
 100      *
 101      * @param inputSource The value returned from the URIResolver.
 102      * @return  a SAX2 XMLReader to use to resolve the inputSource argument.
 103      *
 104      * @return non-null XMLReader reference ready to parse.
 105      */
 106     @Override
 107     synchronized public XMLReader getXMLReader(Source inputSource) {
 108         try {
 109             XMLReader reader = (inputSource instanceof SAXSource) ?
 110                 ((SAXSource)inputSource).getXMLReader() : null;
 111 
 112             // if user did not supply a reader, ask for one from the reader manager
 113             if (null == reader) {
 114                 if (m_readerManager == null) {
 115                     m_readerManager = XMLReaderManager.getInstance(super.useServicesMechnism());
 116               }
 117 
 118               if (!m_namespaceAware)
 119                   m_readerManager.setFeature(Constants.NAMESPACE_FEATURE, false);
 120 
 121               reader = m_readerManager.getXMLReader();
 122             }
 123 
 124             return reader;
 125         } catch (SAXException se) {
 126             throw new DTMException(se.getMessage(), se);
 127         }
 128     }
 129 
 130     /**
 131      * Get an instance of a DTM, loaded with the content from the
 132      * specified source.  If the unique flag is true, a new instance will
 133      * always be returned.  Otherwise it is up to the DTMManager to return a
 134      * new instance or an instance that it already created and may be being used
 135      * by someone else.
 136      * (I think more parameters will need to be added for error handling, and
 137      * entity resolution).
 138      *
 139      * @param source the specification of the source object.
 140      * @param unique true if the returned DTM must be unique, probably because it
 141      * is going to be mutated.
 142      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 143      *                         be null.
 144      * @param incremental true if the DTM should be built incrementally, if
 145      *                    possible.
 146      * @param doIndexing true if the caller considers it worth it to use
 147      *                   indexing schemes.
 148      *
 149      * @return a non-null DTM reference.
 150      */
 151     @Override
 152     public DTM getDTM(Source source, boolean unique,
 153                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 154                       boolean doIndexing)
 155     {
 156         return getDTM(source, unique, whiteSpaceFilter, incremental,
 157                       doIndexing, false, 0, true, false);
 158     }
 159 
 160     /**
 161      * Get an instance of a DTM, loaded with the content from the
 162      * specified source.  If the unique flag is true, a new instance will
 163      * always be returned.  Otherwise it is up to the DTMManager to return a
 164      * new instance or an instance that it already created and may be being used
 165      * by someone else.
 166      * (I think more parameters will need to be added for error handling, and
 167      * entity resolution).
 168      *
 169      * @param source the specification of the source object.
 170      * @param unique true if the returned DTM must be unique, probably because it
 171      * is going to be mutated.
 172      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 173      *                         be null.
 174      * @param incremental true if the DTM should be built incrementally, if
 175      *                    possible.
 176      * @param doIndexing true if the caller considers it worth it to use
 177      *                   indexing schemes.
 178      * @param buildIdIndex true if the id index table should be built.
 179      *
 180      * @return a non-null DTM reference.
 181      */
 182     public DTM getDTM(Source source, boolean unique,
 183                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 184                       boolean doIndexing, boolean buildIdIndex)
 185     {
 186         return getDTM(source, unique, whiteSpaceFilter, incremental,
 187                       doIndexing, false, 0, buildIdIndex, false);
 188     }
 189 
 190     /**
 191      * Get an instance of a DTM, loaded with the content from the
 192      * specified source.  If the unique flag is true, a new instance will
 193      * always be returned.  Otherwise it is up to the DTMManager to return a
 194      * new instance or an instance that it already created and may be being used
 195      * by someone else.
 196      * (I think more parameters will need to be added for error handling, and
 197      * entity resolution).
 198      *
 199      * @param source the specification of the source object.
 200      * @param unique true if the returned DTM must be unique, probably because it
 201      * is going to be mutated.
 202      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 203      *                         be null.
 204      * @param incremental true if the DTM should be built incrementally, if
 205      *                    possible.
 206      * @param doIndexing true if the caller considers it worth it to use
 207      *                   indexing schemes.
 208      * @param buildIdIndex true if the id index table should be built.
 209      * @param newNameTable true if we want to use a separate ExpandedNameTable
 210      *                     for this DTM.
 211      *
 212      * @return a non-null DTM reference.
 213      */
 214     public DTM getDTM(Source source, boolean unique,
 215                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 216                       boolean doIndexing, boolean buildIdIndex,
 217                       boolean newNameTable)
 218     {
 219         return getDTM(source, unique, whiteSpaceFilter, incremental,
 220                       doIndexing, false, 0, buildIdIndex, newNameTable);
 221     }
 222 
 223     /**
 224      * Get an instance of a DTM, loaded with the content from the
 225      * specified source.  If the unique flag is true, a new instance will
 226      * always be returned.  Otherwise it is up to the DTMManager to return a
 227      * new instance or an instance that it already created and may be being used
 228      * by someone else.
 229      * (I think more parameters will need to be added for error handling, and
 230      * entity resolution).
 231      *
 232      * @param source the specification of the source object.
 233      * @param unique true if the returned DTM must be unique, probably because it
 234      * is going to be mutated.
 235      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 236      *                         be null.
 237      * @param incremental true if the DTM should be built incrementally, if
 238      *                    possible.
 239      * @param doIndexing true if the caller considers it worth it to use
 240      *                   indexing schemes.
 241      * @param hasUserReader true if <code>source</code> is a
 242      *                      <code>SAXSource</code> object that has an
 243      *                      <code>XMLReader</code>, that was specified by the
 244      *                      user.
 245      * @param size  Specifies initial size of tables that represent the DTM
 246      * @param buildIdIndex true if the id index table should be built.
 247      *
 248      * @return a non-null DTM reference.
 249      */
 250     public DTM getDTM(Source source, boolean unique,
 251                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 252                       boolean doIndexing, boolean hasUserReader, int size,
 253                       boolean buildIdIndex)
 254     {
 255         return getDTM(source, unique, whiteSpaceFilter, incremental,
 256                       doIndexing, hasUserReader, size,
 257                       buildIdIndex, false);
 258     }
 259 
 260     /**
 261      * Get an instance of a DTM, loaded with the content from the
 262      * specified source.  If the unique flag is true, a new instance will
 263      * always be returned.  Otherwise it is up to the DTMManager to return a
 264      * new instance or an instance that it already created and may be being used
 265      * by someone else.
 266      * (I think more parameters will need to be added for error handling, and
 267      * entity resolution).
 268      *
 269      * @param source the specification of the source object.
 270      * @param unique true if the returned DTM must be unique, probably because it
 271      * is going to be mutated.
 272      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 273      *                         be null.
 274      * @param incremental true if the DTM should be built incrementally, if
 275      *                    possible.
 276      * @param doIndexing true if the caller considers it worth it to use
 277      *                   indexing schemes.
 278      * @param hasUserReader true if <code>source</code> is a
 279      *                      <code>SAXSource</code> object that has an
 280      *                      <code>XMLReader</code>, that was specified by the
 281      *                      user.
 282      * @param size  Specifies initial size of tables that represent the DTM
 283      * @param buildIdIndex true if the id index table should be built.
 284      * @param newNameTable true if we want to use a separate ExpandedNameTable
 285      *                     for this DTM.
 286      *
 287      * @return a non-null DTM reference.
 288      */
 289     public DTM getDTM(Source source, boolean unique,
 290                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 291                       boolean doIndexing, boolean hasUserReader, int size,
 292                       boolean buildIdIndex, boolean newNameTable)
 293     {
 294         /* Uncomment for debugging
 295         if (null != source) {
 296             System.out.println("Starting " +
 297                 (unique ? "UNIQUE" : "shared") +
 298                 " source: " + source.getSystemId());
 299         }
 300         */
 301 
 302         int dtmPos = getFirstFreeDTMID();
 303         int documentID = dtmPos << IDENT_DTM_NODE_BITS;
 304 
 305         if ((null != source) && source instanceof StAXSource) {
 306             final StAXSource staxSource = (StAXSource)source;
 307             StAXEvent2SAX staxevent2sax = null;
 308             StAXStream2SAX staxStream2SAX = null;
 309             if (staxSource.getXMLEventReader() != null) {
 310                 final XMLEventReader xmlEventReader = staxSource.getXMLEventReader();
 311                 staxevent2sax = new StAXEvent2SAX(xmlEventReader);
 312             } else if (staxSource.getXMLStreamReader() != null) {
 313                 final XMLStreamReader xmlStreamReader = staxSource.getXMLStreamReader();
 314                 staxStream2SAX = new StAXStream2SAX(xmlStreamReader);
 315             }
 316 
 317             SAXImpl dtm;
 318 
 319             if (size <= 0) {
 320                 dtm = new SAXImpl(this, source, documentID,
 321                                   whiteSpaceFilter, null, doIndexing,
 322                                   DTMDefaultBase.DEFAULT_BLOCKSIZE,
 323                                   buildIdIndex, newNameTable);
 324             } else {
 325                 dtm = new SAXImpl(this, source, documentID,
 326                                   whiteSpaceFilter, null, doIndexing,
 327                                   size, buildIdIndex, newNameTable);
 328             }
 329 
 330             dtm.setDocumentURI(source.getSystemId());
 331 
 332             addDTM(dtm, dtmPos, 0);
 333 
 334             try {
 335                 if (staxevent2sax != null) {
 336                     staxevent2sax.setContentHandler(dtm);
 337                     staxevent2sax.parse();
 338                 }
 339                 else if (staxStream2SAX != null) {
 340                     staxStream2SAX.setContentHandler(dtm);
 341                     staxStream2SAX.parse();
 342                 }
 343             } catch (RuntimeException re) {
 344                 throw re;
 345             } catch (Exception e) {
 346                 throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 347             }
 348 
 349             return dtm;
 350         } else if ((null != source) && source instanceof DOMSource) {
 351             final DOMSource domsrc = (DOMSource) source;
 352             final org.w3c.dom.Node node = domsrc.getNode();
 353             final DOM2SAX dom2sax = new DOM2SAX(node);
 354 
 355             SAXImpl dtm;
 356 
 357             if (size <= 0) {
 358                 dtm = new SAXImpl(this, source, documentID,
 359                                   whiteSpaceFilter, null, doIndexing,
 360                                   DTMDefaultBase.DEFAULT_BLOCKSIZE,
 361                                   buildIdIndex, newNameTable);
 362             } else {
 363                 dtm = new SAXImpl(this, source, documentID,
 364                                   whiteSpaceFilter, null, doIndexing,
 365                                   size, buildIdIndex, newNameTable);
 366             }
 367 
 368             dtm.setDocumentURI(source.getSystemId());
 369 
 370             addDTM(dtm, dtmPos, 0);
 371 
 372             dom2sax.setContentHandler(dtm);
 373 
 374             try {
 375                 dom2sax.parse();
 376             } catch (RuntimeException re) {
 377                 throw re;
 378             } catch (Exception e) {
 379                 throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 380             }
 381 
 382             return dtm;
 383         } else {
 384             boolean isSAXSource = (null != source)
 385                                   ? (source instanceof SAXSource) : true;
 386             boolean isStreamSource = (null != source)
 387                                   ? (source instanceof StreamSource) : false;
 388 
 389             if (isSAXSource || isStreamSource) {
 390                 XMLReader reader;
 391                 InputSource xmlSource;
 392 
 393                 if (null == source) {
 394                     xmlSource = null;
 395                     reader = null;
 396                     hasUserReader = false;  // Make sure the user didn't lie
 397                 }
 398                 else {
 399                     reader = getXMLReader(source);
 400                     xmlSource = SAXSource.sourceToInputSource(source);
 401 
 402                     String urlOfSource = xmlSource.getSystemId();
 403 
 404                     if (null != urlOfSource) {
 405                         try {
 406                             urlOfSource = SystemIDResolver.getAbsoluteURI(urlOfSource);
 407                         }
 408                         catch (Exception e) {
 409                             // %REVIEW% Is there a better way to send a warning?
 410                             System.err.println("Can not absolutize URL: " + urlOfSource);
 411                         }
 412 
 413                         xmlSource.setSystemId(urlOfSource);
 414                     }
 415                 }
 416 
 417                 // Create the basic SAX2DTM.
 418                 SAXImpl dtm;
 419                 if (size <= 0) {
 420                     dtm = new SAXImpl(this, source, documentID, whiteSpaceFilter,
 421                                       null, doIndexing,
 422                                       DTMDefaultBase.DEFAULT_BLOCKSIZE,
 423                                       buildIdIndex, newNameTable);
 424                 } else {
 425                     dtm = new SAXImpl(this, source, documentID, whiteSpaceFilter,
 426                             null, doIndexing, size, buildIdIndex, newNameTable);
 427                 }
 428 
 429                 // Go ahead and add the DTM to the lookup table.  This needs to be
 430                 // done before any parsing occurs. Note offset 0, since we've just
 431                 // created a new DTM.
 432                 addDTM(dtm, dtmPos, 0);
 433 
 434                 if (null == reader) {
 435                     // Then the user will construct it themselves.
 436                     return dtm;
 437                 }
 438 
 439                 reader.setContentHandler(dtm.getBuilder());
 440 
 441                 if (!hasUserReader || null == reader.getDTDHandler()) {
 442                     reader.setDTDHandler(dtm);
 443                 }
 444 
 445                 if(!hasUserReader || null == reader.getErrorHandler()) {
 446                     reader.setErrorHandler(dtm);
 447                 }
 448 
 449                 try {
 450                     reader.setProperty("http://xml.org/sax/properties/lexical-handler", dtm);
 451                 }
 452                 catch (SAXNotRecognizedException e){}
 453                 catch (SAXNotSupportedException e){}
 454 
 455                 try {
 456                     reader.parse(xmlSource);
 457                 }
 458                 catch (RuntimeException re) {
 459                     throw re;
 460                 }
 461                 catch (Exception e) {
 462                     throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 463                 } finally {
 464                     if (!hasUserReader) {
 465                         releaseXMLReader(reader);
 466                     }
 467                 }
 468 
 469                 /* Uncomment for debugging
 470                 System.out.println("Dumping SAX2DOM");
 471                 dtm.dumpDTM(System.err);
 472                 */
 473 
 474                 return dtm;
 475             }
 476             else {
 477                 // It should have been handled by a derived class or the caller
 478                 // made a mistake.
 479                 throw new DTMException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NOT_SUPPORTED, new Object[]{source}));
 480             }
 481         }
 482     }
 483 }