1 /*
   2  * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xalan.internal.xsltc.dom;
  22 
  23 import com.sun.org.apache.xml.internal.dtm.DTM;
  24 import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBase;
  25 import com.sun.org.apache.xml.internal.dtm.DTMException;
  26 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  27 import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
  28 import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  29 import com.sun.org.apache.xml.internal.res.XMLMessages;
  30 import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
  31 import com.sun.org.apache.xalan.internal.xsltc.trax.DOM2SAX;
  32 import com.sun.org.apache.xalan.internal.xsltc.trax.StAXEvent2SAX;
  33 import com.sun.org.apache.xalan.internal.xsltc.trax.StAXStream2SAX;
  34 import javax.xml.stream.XMLEventReader;
  35 import javax.xml.stream.XMLStreamReader;
  36 import javax.xml.transform.Source;
  37 import javax.xml.transform.dom.DOMSource;
  38 import javax.xml.transform.sax.SAXSource;
  39 import javax.xml.transform.stream.StreamSource;
  40 import javax.xml.transform.stax.StAXSource;
  41 import jdk.xml.internal.JdkXmlUtils;
  42 import org.xml.sax.InputSource;
  43 import org.xml.sax.SAXNotRecognizedException;
  44 import org.xml.sax.SAXNotSupportedException;
  45 import org.xml.sax.XMLReader;
  46 
  47 /**
  48  * The XSLTC DTMManager implementation.
  49  */
  50 public class XSLTCDTMManager extends DTMManagerDefault {
  51 
  52     /**
  53      * Constructor XSLTCDTMManager
  54      *
  55      */
  56     public XSLTCDTMManager() {
  57         super();
  58     }
  59 
  60     /**
  61      * Obtain a new instance of a <code>DTMManager</code>.
  62      * This static method creates a new factory instance.
  63      * The current implementation just returns a new XSLTCDTMManager instance.
  64      */
  65     public static XSLTCDTMManager newInstance() {
  66         return new XSLTCDTMManager();
  67     }
  68 
  69     /**
  70      * Creates a new instance of the XSLTC DTM Manager service.
  71      * Creates a new instance of the default class
  72      * <code>com.sun.org.apache.xalan.internal.xsltc.dom.XSLTCDTMManager</code>.
  73      */
  74     public static XSLTCDTMManager createNewDTMManagerInstance() {
  75         return newInstance();
  76     }
  77 
  78     /**
  79      * Get an instance of a DTM, loaded with the content from the
  80      * specified source.  If the unique flag is true, a new instance will
  81      * always be returned.  Otherwise it is up to the DTMManager to return a
  82      * new instance or an instance that it already created and may be being used
  83      * by someone else.
  84      * (I think more parameters will need to be added for error handling, and
  85      * entity resolution).
  86      *
  87      * @param source the specification of the source object.
  88      * @param unique true if the returned DTM must be unique, probably because it
  89      * is going to be mutated.
  90      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
  91      *                         be null.
  92      * @param incremental true if the DTM should be built incrementally, if
  93      *                    possible.
  94      * @param doIndexing true if the caller considers it worth it to use
  95      *                   indexing schemes.
  96      *
  97      * @return a non-null DTM reference.
  98      */
  99     @Override
 100     public DTM getDTM(Source source, boolean unique,
 101                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 102                       boolean doIndexing)
 103     {
 104         return getDTM(source, unique, whiteSpaceFilter, incremental,
 105                       doIndexing, false, 0, true, false);
 106     }
 107 
 108     /**
 109      * Get an instance of a DTM, loaded with the content from the
 110      * specified source.  If the unique flag is true, a new instance will
 111      * always be returned.  Otherwise it is up to the DTMManager to return a
 112      * new instance or an instance that it already created and may be being used
 113      * by someone else.
 114      * (I think more parameters will need to be added for error handling, and
 115      * entity resolution).
 116      *
 117      * @param source the specification of the source object.
 118      * @param unique true if the returned DTM must be unique, probably because it
 119      * is going to be mutated.
 120      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 121      *                         be null.
 122      * @param incremental true if the DTM should be built incrementally, if
 123      *                    possible.
 124      * @param doIndexing true if the caller considers it worth it to use
 125      *                   indexing schemes.
 126      * @param buildIdIndex true if the id index table should be built.
 127      *
 128      * @return a non-null DTM reference.
 129      */
 130     public DTM getDTM(Source source, boolean unique,
 131                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 132                       boolean doIndexing, boolean buildIdIndex)
 133     {
 134         return getDTM(source, unique, whiteSpaceFilter, incremental,
 135                       doIndexing, false, 0, buildIdIndex, false);
 136     }
 137 
 138     /**
 139      * Get an instance of a DTM, loaded with the content from the
 140      * specified source.  If the unique flag is true, a new instance will
 141      * always be returned.  Otherwise it is up to the DTMManager to return a
 142      * new instance or an instance that it already created and may be being used
 143      * by someone else.
 144      * (I think more parameters will need to be added for error handling, and
 145      * entity resolution).
 146      *
 147      * @param source the specification of the source object.
 148      * @param unique true if the returned DTM must be unique, probably because it
 149      * is going to be mutated.
 150      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 151      *                         be null.
 152      * @param incremental true if the DTM should be built incrementally, if
 153      *                    possible.
 154      * @param doIndexing true if the caller considers it worth it to use
 155      *                   indexing schemes.
 156      * @param buildIdIndex true if the id index table should be built.
 157      * @param newNameTable true if we want to use a separate ExpandedNameTable
 158      *                     for this DTM.
 159      *
 160      * @return a non-null DTM reference.
 161      */
 162     public DTM getDTM(Source source, boolean unique,
 163                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 164                       boolean doIndexing, boolean buildIdIndex,
 165                       boolean newNameTable)
 166     {
 167         return getDTM(source, unique, whiteSpaceFilter, incremental,
 168                       doIndexing, false, 0, buildIdIndex, newNameTable);
 169     }
 170 
 171     /**
 172      * Get an instance of a DTM, loaded with the content from the
 173      * specified source.  If the unique flag is true, a new instance will
 174      * always be returned.  Otherwise it is up to the DTMManager to return a
 175      * new instance or an instance that it already created and may be being used
 176      * by someone else.
 177      * (I think more parameters will need to be added for error handling, and
 178      * entity resolution).
 179      *
 180      * @param source the specification of the source object.
 181      * @param unique true if the returned DTM must be unique, probably because it
 182      * is going to be mutated.
 183      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 184      *                         be null.
 185      * @param incremental true if the DTM should be built incrementally, if
 186      *                    possible.
 187      * @param doIndexing true if the caller considers it worth it to use
 188      *                   indexing schemes.
 189      * @param hasUserReader true if <code>source</code> is a
 190      *                      <code>SAXSource</code> object that has an
 191      *                      <code>XMLReader</code>, that was specified by the
 192      *                      user.
 193      * @param size  Specifies initial size of tables that represent the DTM
 194      * @param buildIdIndex true if the id index table should be built.
 195      *
 196      * @return a non-null DTM reference.
 197      */
 198     public DTM getDTM(Source source, boolean unique,
 199                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 200                       boolean doIndexing, boolean hasUserReader, int size,
 201                       boolean buildIdIndex)
 202     {
 203         return getDTM(source, unique, whiteSpaceFilter, incremental,
 204                       doIndexing, hasUserReader, size,
 205                       buildIdIndex, false);
 206     }
 207 
 208     /**
 209      * Get an instance of a DTM, loaded with the content from the
 210      * specified source.  If the unique flag is true, a new instance will
 211      * always be returned.  Otherwise it is up to the DTMManager to return a
 212      * new instance or an instance that it already created and may be being used
 213      * by someone else.
 214      * (I think more parameters will need to be added for error handling, and
 215      * entity resolution).
 216      *
 217      * @param source the specification of the source object.
 218      * @param unique true if the returned DTM must be unique, probably because it
 219      * is going to be mutated.
 220      * @param whiteSpaceFilter Enables filtering of whitespace nodes, and may
 221      *                         be null.
 222      * @param incremental true if the DTM should be built incrementally, if
 223      *                    possible.
 224      * @param doIndexing true if the caller considers it worth it to use
 225      *                   indexing schemes.
 226      * @param hasUserReader true if <code>source</code> is a
 227      *                      <code>SAXSource</code> object that has an
 228      *                      <code>XMLReader</code>, that was specified by the
 229      *                      user.
 230      * @param size  Specifies initial size of tables that represent the DTM
 231      * @param buildIdIndex true if the id index table should be built.
 232      * @param newNameTable true if we want to use a separate ExpandedNameTable
 233      *                     for this DTM.
 234      *
 235      * @return a non-null DTM reference.
 236      */
 237     public DTM getDTM(Source source, boolean unique,
 238                       DTMWSFilter whiteSpaceFilter, boolean incremental,
 239                       boolean doIndexing, boolean hasUserReader, int size,
 240                       boolean buildIdIndex, boolean newNameTable)
 241     {
 242         /* Uncomment for debugging
 243         if (null != source) {
 244             System.out.println("Starting " +
 245                 (unique ? "UNIQUE" : "shared") +
 246                 " source: " + source.getSystemId());
 247         }
 248         */
 249 
 250         int dtmPos = getFirstFreeDTMID();
 251         int documentID = dtmPos << IDENT_DTM_NODE_BITS;
 252 
 253         if ((null != source) && source instanceof StAXSource) {
 254             final StAXSource staxSource = (StAXSource)source;
 255             StAXEvent2SAX staxevent2sax = null;
 256             StAXStream2SAX staxStream2SAX = null;
 257             if (staxSource.getXMLEventReader() != null) {
 258                 final XMLEventReader xmlEventReader = staxSource.getXMLEventReader();
 259                 staxevent2sax = new StAXEvent2SAX(xmlEventReader);
 260             } else if (staxSource.getXMLStreamReader() != null) {
 261                 final XMLStreamReader xmlStreamReader = staxSource.getXMLStreamReader();
 262                 staxStream2SAX = new StAXStream2SAX(xmlStreamReader);
 263             }
 264 
 265             SAXImpl dtm;
 266 
 267             if (size <= 0) {
 268                 dtm = new SAXImpl(this, source, documentID,
 269                                   whiteSpaceFilter, null, doIndexing,
 270                                   DTMDefaultBase.DEFAULT_BLOCKSIZE,
 271                                   buildIdIndex, newNameTable);
 272             } else {
 273                 dtm = new SAXImpl(this, source, documentID,
 274                                   whiteSpaceFilter, null, doIndexing,
 275                                   size, buildIdIndex, newNameTable);
 276             }
 277 
 278             dtm.setDocumentURI(source.getSystemId());
 279 
 280             addDTM(dtm, dtmPos, 0);
 281 
 282             try {
 283                 if (staxevent2sax != null) {
 284                     staxevent2sax.setContentHandler(dtm);
 285                     staxevent2sax.parse();
 286                 }
 287                 else if (staxStream2SAX != null) {
 288                     staxStream2SAX.setContentHandler(dtm);
 289                     staxStream2SAX.parse();
 290                 }
 291             } catch (RuntimeException re) {
 292                 throw re;
 293             } catch (Exception e) {
 294                 throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 295             }
 296 
 297             return dtm;
 298         } else if ((null != source) && source instanceof DOMSource) {
 299             final DOMSource domsrc = (DOMSource) source;
 300             final org.w3c.dom.Node node = domsrc.getNode();
 301             final DOM2SAX dom2sax = new DOM2SAX(node);
 302 
 303             SAXImpl dtm;
 304 
 305             if (size <= 0) {
 306                 dtm = new SAXImpl(this, source, documentID,
 307                                   whiteSpaceFilter, null, doIndexing,
 308                                   DTMDefaultBase.DEFAULT_BLOCKSIZE,
 309                                   buildIdIndex, newNameTable);
 310             } else {
 311                 dtm = new SAXImpl(this, source, documentID,
 312                                   whiteSpaceFilter, null, doIndexing,
 313                                   size, buildIdIndex, newNameTable);
 314             }
 315 
 316             dtm.setDocumentURI(source.getSystemId());
 317 
 318             addDTM(dtm, dtmPos, 0);
 319 
 320             dom2sax.setContentHandler(dtm);
 321 
 322             try {
 323                 dom2sax.parse();
 324             } catch (RuntimeException re) {
 325                 throw re;
 326             } catch (Exception e) {
 327                 throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 328             }
 329 
 330             return dtm;
 331         } else {
 332             boolean isSAXSource = (null != source)
 333                                   ? (source instanceof SAXSource) : true;
 334             boolean isStreamSource = (null != source)
 335                                   ? (source instanceof StreamSource) : false;
 336 
 337             if (isSAXSource || isStreamSource) {
 338                 XMLReader reader;
 339                 InputSource xmlSource;
 340 
 341                 if (null == source) {
 342                     xmlSource = null;
 343                     reader = null;
 344                     hasUserReader = false;  // Make sure the user didn't lie
 345                 }
 346                 else {
 347                     reader = getXMLReader(source);
 348                     xmlSource = SAXSource.sourceToInputSource(source);
 349 
 350                     String urlOfSource = xmlSource.getSystemId();
 351 
 352                     if (null != urlOfSource) {
 353                         try {
 354                             urlOfSource = SystemIDResolver.getAbsoluteURI(urlOfSource);
 355                         }
 356                         catch (Exception e) {
 357                             // %REVIEW% Is there a better way to send a warning?
 358                             System.err.println("Can not absolutize URL: " + urlOfSource);
 359                         }
 360 
 361                         xmlSource.setSystemId(urlOfSource);
 362                     }
 363                 }
 364 
 365                 // Create the basic SAX2DTM.
 366                 SAXImpl dtm;
 367                 if (size <= 0) {
 368                     dtm = new SAXImpl(this, source, documentID, whiteSpaceFilter,
 369                                       null, doIndexing,
 370                                       DTMDefaultBase.DEFAULT_BLOCKSIZE,
 371                                       buildIdIndex, newNameTable);
 372                 } else {
 373                     dtm = new SAXImpl(this, source, documentID, whiteSpaceFilter,
 374                             null, doIndexing, size, buildIdIndex, newNameTable);
 375                 }
 376 
 377                 // Go ahead and add the DTM to the lookup table.  This needs to be
 378                 // done before any parsing occurs. Note offset 0, since we've just
 379                 // created a new DTM.
 380                 addDTM(dtm, dtmPos, 0);
 381 
 382                 if (null == reader) {
 383                     // Then the user will construct it themselves.
 384                     return dtm;
 385                 }
 386 
 387                 reader.setContentHandler(dtm.getBuilder());
 388 
 389                 if (!hasUserReader || null == reader.getDTDHandler()) {
 390                     reader.setDTDHandler(dtm);
 391                 }
 392 
 393                 if(!hasUserReader || null == reader.getErrorHandler()) {
 394                     reader.setErrorHandler(dtm);
 395                 }
 396 
 397                 JdkXmlUtils.setXMLReaderPropertyIfSupport(reader,
 398                     "http://xml.org/sax/properties/lexical-handler", dtm, false);
 399 
 400                 try {
 401                     reader.parse(xmlSource);
 402                 }
 403                 catch (RuntimeException re) {
 404                     throw re;
 405                 }
 406                 catch (Exception e) {
 407                     throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(e);
 408                 } finally {
 409                     if (!hasUserReader) {
 410                         releaseXMLReader(reader);
 411                     }
 412                 }
 413 
 414                 /* Uncomment for debugging
 415                 System.out.println("Dumping SAX2DOM");
 416                 dtm.dumpDTM(System.err);
 417                 */
 418 
 419                 return dtm;
 420             }
 421             else {
 422                 // It should have been handled by a derived class or the caller
 423                 // made a mistake.
 424                 throw new DTMException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NOT_SUPPORTED, new Object[]{source}));
 425             }
 426         }
 427     }
 428 }