1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;
  22 
  23 import com.sun.org.apache.xml.internal.dtm.DTM;
  24 import com.sun.org.apache.xml.internal.dtm.DTMManager;
  25 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  26 import com.sun.org.apache.xml.internal.utils.IntStack;
  27 import com.sun.org.apache.xml.internal.utils.IntVector;
  28 import com.sun.org.apache.xml.internal.utils.StringVector;
  29 import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  30 import java.util.Vector;
  31 import javax.xml.transform.Source;
  32 import org.xml.sax.SAXException;
  33 
  34 /**
  35  * This is a subclass of SAX2DTM which has been modified to meet the needs of
  36  * Result Tree Frameworks (RTFs). The differences are:
  37  *
  38  * 1) Multiple XML trees may be appended to the single DTM. This means
  39  * that the root node of each document is _not_ node 0. Some code has
  40  * had to be deoptimized to support this mode of operation, and an
  41  * explicit mechanism for obtaining the Node Handle of the root node
  42  * has been provided.
  43  *
  44  * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
  45  * most recently added trees off the end of the DTM as stylesheet elements
  46  * (and thus variable contexts) are exited.
  47  *
  48  * PLEASE NOTE that this class may be _heavily_ dependent upon the
  49  * internals of the SAX2DTM superclass, and must be maintained in
  50  * parallel with that code.  Arguably, they should be conditionals
  51  * within a single class... but they have deen separated for
  52  * performance reasons. (In fact, one could even argue about which is
  53  * the superclass and which is the subclass; the current arrangement
  54  * is as much about preserving stability of existing code during
  55  * development as anything else.)
  56  *
  57  * %REVIEW% In fact, since the differences are so minor, I think it
  58  * may be possible/practical to fold them back into the base
  59  * SAX2DTM. Consider that as a future code-size optimization.
  60  *
  61  * @LastModified: Oct 2017
  62  */
  63 public class SAX2RTFDTM extends SAX2DTM
  64 {
  65   /** Set true to monitor SAX events and similar diagnostic info. */
  66   private static final boolean DEBUG = false;
  67 
  68   /** Most recently started Document, or null if the DTM is empty.  */
  69   private int m_currentDocumentNode=NULL;
  70 
  71   /** Tail-pruning mark: Number of nodes in use */
  72   IntStack mark_size=new IntStack();
  73   /** Tail-pruning mark: Number of data items in use */
  74   IntStack mark_data_size=new IntStack();
  75   /** Tail-pruning mark: Number of size-of-data fields in use */
  76   IntStack mark_char_size=new IntStack();
  77   /** Tail-pruning mark: Number of dataOrQName slots in use */
  78   IntStack mark_doq_size=new IntStack();
  79   /** Tail-pruning mark: Number of namespace declaration sets in use
  80    * %REVIEW% I don't think number of NS sets is ever different from number
  81    * of NS elements. We can probabably reduce these to a single stack and save
  82    * some storage.
  83    * */
  84   IntStack mark_nsdeclset_size=new IntStack();
  85   /** Tail-pruning mark: Number of naespace declaration elements in use
  86    * %REVIEW% I don't think number of NS sets is ever different from number
  87    * of NS elements. We can probabably reduce these to a single stack and save
  88    * some storage.
  89    */
  90   IntStack mark_nsdeclelem_size=new IntStack();
  91 
  92   /**
  93    * Tail-pruning mark:  initial number of nodes in use
  94    */
  95   int m_emptyNodeCount;
  96 
  97   /**
  98    * Tail-pruning mark:  initial number of namespace declaration sets
  99    */
 100   int m_emptyNSDeclSetCount;
 101 
 102   /**
 103    * Tail-pruning mark:  initial number of namespace declaration elements
 104    */
 105   int m_emptyNSDeclSetElemsCount;
 106 
 107   /**
 108    * Tail-pruning mark:  initial number of data items in use
 109    */
 110   int m_emptyDataCount;
 111 
 112   /**
 113    * Tail-pruning mark:  initial number of characters in use
 114    */
 115   int m_emptyCharsCount;
 116 
 117   /**
 118    * Tail-pruning mark:  default initial number of dataOrQName slots in use
 119    */
 120   int m_emptyDataQNCount;
 121 
 122   public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
 123                  DTMWSFilter whiteSpaceFilter,
 124                  XMLStringFactory xstringfactory,
 125                  boolean doIndexing)
 126   {
 127     super(mgr, source, dtmIdentity, whiteSpaceFilter,
 128           xstringfactory, doIndexing);
 129 
 130     // NEVER track source locators for RTFs; they aren't meaningful. I think.
 131     // (If we did track them, we'd need to tail-prune these too.)
 132     //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;
 133     m_useSourceLocationProperty=false;
 134     m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
 135                                                      : null;
 136     m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
 137     m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
 138 
 139     // Record initial sizes of fields that are pushed and restored
 140     // for RTF tail-pruning.  More entries can be popped than pushed, so
 141     // we need this to mark the primordial state of the DTM.
 142     m_emptyNodeCount = m_size;
 143     m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
 144                                  ? 0 : m_namespaceDeclSets.size();
 145     m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
 146                                       ? 0 : m_namespaceDeclSetElements.size();
 147     m_emptyDataCount = m_data.size();
 148     m_emptyCharsCount = m_chars.size();
 149     m_emptyDataQNCount = m_dataOrQName.size();
 150   }
 151 
 152   /**
 153    * Given a DTM, find the owning document node. In the case of
 154    * SAX2RTFDTM, which may contain multiple documents, this returns
 155    * the <b>most recently started</b> document, or null if the DTM is
 156    * empty or no document is currently under construction.
 157    *
 158    * %REVIEW% Should we continue to report the most recent after
 159    * construction has ended? I think not, given that it may have been
 160    * tail-pruned.
 161    *
 162    *  @return int Node handle of Document node, or null if this DTM does not
 163    *  contain an "active" document.
 164    * */
 165   public int getDocument()
 166   {
 167     return makeNodeHandle(m_currentDocumentNode);
 168   }
 169 
 170   /**
 171    * Given a node handle, find the owning document node, using DTM semantics
 172    * (Document owns itself) rather than DOM semantics (Document has no owner).
 173    *
 174    * (I'm counting on the fact that getOwnerDocument() is implemented on top
 175    * of this call, in the superclass, to avoid having to rewrite that one.
 176    * Be careful if that code changes!)
 177    *
 178    * @param nodeHandle the id of the node.
 179    * @return int Node handle of owning document
 180    */
 181   public int getDocumentRoot(int nodeHandle)
 182   {
 183     for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
 184       if (_type(id)==DTM.DOCUMENT_NODE) {
 185         return makeNodeHandle(id);
 186       }
 187     }
 188 
 189     return DTM.NULL; // Safety net; should never happen
 190   }
 191 
 192   /**
 193    * Given a node identifier, find the owning document node.  Unlike the DOM,
 194    * this considers the owningDocument of a Document to be itself. Note that
 195    * in shared DTMs this may not be zero.
 196    *
 197    * @param nodeIdentifier the id of the starting node.
 198    * @return int Node identifier of the root of this DTM tree
 199    */
 200   protected int _documentRoot(int nodeIdentifier)
 201   {
 202     if(nodeIdentifier==NULL) return NULL;
 203 
 204     for (int parent=_parent(nodeIdentifier);
 205          parent!=NULL;
 206          nodeIdentifier=parent,parent=_parent(nodeIdentifier))
 207       ;
 208 
 209     return nodeIdentifier;
 210   }
 211 
 212   /**
 213    * Receive notification of the beginning of a new RTF document.
 214    *
 215    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
 216    * might want to consider folding the start/endDocument changes back
 217    * into the main SAX2DTM so we don't have to expose so many fields
 218    * (even as Protected) and carry the additional code.
 219    *
 220    * @throws SAXException Any SAX exception, possibly
 221    *            wrapping another exception.
 222    * @see org.xml.sax.ContentHandler#startDocument
 223    * */
 224   public void startDocument() throws SAXException
 225   {
 226     // Re-initialize the tree append process
 227     m_endDocumentOccured = false;
 228     m_prefixMappings = new Vector<>();
 229     m_contextIndexes = new IntStack();
 230     m_parents = new IntStack();
 231 
 232     m_currentDocumentNode=m_size;
 233     super.startDocument();
 234   }
 235 
 236   /**
 237    * Receive notification of the end of the document.
 238    *
 239    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
 240    * might want to consider folding the start/endDocument changes back
 241    * into the main SAX2DTM so we don't have to expose so many fields
 242    * (even as Protected).
 243    *
 244    * @throws SAXException Any SAX exception, possibly
 245    *            wrapping another exception.
 246    * @see org.xml.sax.ContentHandler#endDocument
 247    * */
 248   public void endDocument() throws SAXException
 249   {
 250     charactersFlush();
 251 
 252     m_nextsib.setElementAt(NULL,m_currentDocumentNode);
 253 
 254     if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
 255       m_firstch.setElementAt(NULL,m_currentDocumentNode);
 256 
 257     if (DTM.NULL != m_previous)
 258       m_nextsib.setElementAt(DTM.NULL,m_previous);
 259 
 260     m_parents = null;
 261     m_prefixMappings = null;
 262     m_contextIndexes = null;
 263 
 264     m_currentDocumentNode= NULL; // no longer open
 265     m_endDocumentOccured = true;
 266   }
 267 
 268 
 269   /** "Tail-pruning" support for RTFs.
 270    *
 271    * This function pushes information about the current size of the
 272    * DTM's data structures onto a stack, for use by popRewindMark()
 273    * (which see).
 274    *
 275    * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
 276    * RTFs will not be indexed, so I can simply panic if that case
 277    * arises. Hey, it works...
 278    * */
 279   public void pushRewindMark()
 280   {
 281     if(m_indexing || m_elemIndexes!=null)
 282       throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
 283 
 284     // Values from DTMDefaultBase
 285     // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
 286     mark_size.push(m_size);
 287     mark_nsdeclset_size.push((m_namespaceDeclSets==null)
 288                                    ? 0
 289                                    : m_namespaceDeclSets.size());
 290     mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
 291                                    ? 0
 292                                    : m_namespaceDeclSetElements.size());
 293 
 294     // Values from SAX2DTM
 295     mark_data_size.push(m_data.size());
 296     mark_char_size.push(m_chars.size());
 297     mark_doq_size.push(m_dataOrQName.size());
 298   }
 299 
 300   /** "Tail-pruning" support for RTFs.
 301    *
 302    * This function pops the information previously saved by
 303    * pushRewindMark (which see) and uses it to discard all nodes added
 304    * to the DTM after that time. We expect that this will allow us to
 305    * reuse storage more effectively.
 306    *
 307    * This is _not_ intended to be called while a document is still being
 308    * constructed -- only between endDocument and the next startDocument
 309    *
 310    * %REVIEW% WARNING: This is the first use of some of the truncation
 311    * methods.  If Xalan blows up after this is called, that's a likely
 312    * place to check.
 313    *
 314    * %REVIEW% Our original design for DTMs permitted them to share
 315    * string pools.  If there any risk that this might be happening, we
 316    * can _not_ rewind and recover the string storage. One solution
 317    * might to assert that DTMs used for RTFs Must Not take advantage
 318    * of that feature, but this seems excessively fragile. Another, much
 319    * less attractive, would be to just let them leak... Nah.
 320    *
 321    * @return true if and only if the pop completely emptied the
 322    * RTF. That response is used when determining how to unspool
 323    * RTF-started-while-RTF-open situations.
 324    * */
 325   public boolean popRewindMark()
 326   {
 327     boolean top=mark_size.empty();
 328 
 329     m_size=top ? m_emptyNodeCount : mark_size.pop();
 330     m_exptype.setSize(m_size);
 331     m_firstch.setSize(m_size);
 332     m_nextsib.setSize(m_size);
 333     m_prevsib.setSize(m_size);
 334     m_parent.setSize(m_size);
 335 
 336     m_elemIndexes=null;
 337 
 338     int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
 339     if (m_namespaceDeclSets!=null) {
 340       m_namespaceDeclSets.setSize(ds);
 341     }
 342 
 343     int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
 344     if (m_namespaceDeclSetElements!=null) {
 345       m_namespaceDeclSetElements.setSize(ds1);
 346     }
 347 
 348     // Values from SAX2DTM - m_data always has a reserved entry
 349     m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
 350     m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
 351     m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
 352 
 353     // Return true iff DTM now empty
 354     return m_size==0;
 355   }
 356 
 357   /** @return true if a DTM tree is currently under construction.
 358    * */
 359   public boolean isTreeIncomplete()
 360   {
 361     return !m_endDocumentOccured;
 362   }
 363 }