1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;
  23 
  24 import com.sun.org.apache.xml.internal.dtm.DTM;
  25 import com.sun.org.apache.xml.internal.dtm.DTMManager;
  26 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  27 import com.sun.org.apache.xml.internal.utils.IntStack;
  28 import com.sun.org.apache.xml.internal.utils.IntVector;
  29 import com.sun.org.apache.xml.internal.utils.StringVector;
  30 import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  31 import java.util.Vector;
  32 import javax.xml.transform.Source;
  33 import org.xml.sax.SAXException;
  34 
  35 /**
  36  * This is a subclass of SAX2DTM which has been modified to meet the needs of
  37  * Result Tree Frameworks (RTFs). The differences are:
  38  *
  39  * 1) Multiple XML trees may be appended to the single DTM. This means
  40  * that the root node of each document is _not_ node 0. Some code has
  41  * had to be deoptimized to support this mode of operation, and an
  42  * explicit mechanism for obtaining the Node Handle of the root node
  43  * has been provided.
  44  *
  45  * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
  46  * most recently added trees off the end of the DTM as stylesheet elements
  47  * (and thus variable contexts) are exited.
  48  *
  49  * PLEASE NOTE that this class may be _heavily_ dependent upon the
  50  * internals of the SAX2DTM superclass, and must be maintained in
  51  * parallel with that code.  Arguably, they should be conditionals
  52  * within a single class... but they have deen separated for
  53  * performance reasons. (In fact, one could even argue about which is
  54  * the superclass and which is the subclass; the current arrangement
  55  * is as much about preserving stability of existing code during
  56  * development as anything else.)
  57  *
  58  * %REVIEW% In fact, since the differences are so minor, I think it
  59  * may be possible/practical to fold them back into the base
  60  * SAX2DTM. Consider that as a future code-size optimization.
  61  * */
  62 public class SAX2RTFDTM extends SAX2DTM
  63 {
  64   /** Set true to monitor SAX events and similar diagnostic info. */
  65   private static final boolean DEBUG = false;
  66 
  67   /** Most recently started Document, or null if the DTM is empty.  */
  68   private int m_currentDocumentNode=NULL;
  69 
  70   /** Tail-pruning mark: Number of nodes in use */
  71   IntStack mark_size=new IntStack();
  72   /** Tail-pruning mark: Number of data items in use */
  73   IntStack mark_data_size=new IntStack();
  74   /** Tail-pruning mark: Number of size-of-data fields in use */
  75   IntStack mark_char_size=new IntStack();
  76   /** Tail-pruning mark: Number of dataOrQName slots in use */
  77   IntStack mark_doq_size=new IntStack();
  78   /** Tail-pruning mark: Number of namespace declaration sets in use
  79    * %REVIEW% I don't think number of NS sets is ever different from number
  80    * of NS elements. We can probabably reduce these to a single stack and save
  81    * some storage.
  82    * */
  83   IntStack mark_nsdeclset_size=new IntStack();
  84   /** Tail-pruning mark: Number of naespace declaration elements in use
  85    * %REVIEW% I don't think number of NS sets is ever different from number
  86    * of NS elements. We can probabably reduce these to a single stack and save
  87    * some storage.
  88    */
  89   IntStack mark_nsdeclelem_size=new IntStack();
  90 
  91   /**
  92    * Tail-pruning mark:  initial number of nodes in use
  93    */
  94   int m_emptyNodeCount;
  95 
  96   /**
  97    * Tail-pruning mark:  initial number of namespace declaration sets
  98    */
  99   int m_emptyNSDeclSetCount;
 100 
 101   /**
 102    * Tail-pruning mark:  initial number of namespace declaration elements
 103    */
 104   int m_emptyNSDeclSetElemsCount;
 105 
 106   /**
 107    * Tail-pruning mark:  initial number of data items in use
 108    */
 109   int m_emptyDataCount;
 110 
 111   /**
 112    * Tail-pruning mark:  initial number of characters in use
 113    */
 114   int m_emptyCharsCount;
 115 
 116   /**
 117    * Tail-pruning mark:  default initial number of dataOrQName slots in use
 118    */
 119   int m_emptyDataQNCount;
 120 
 121   public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
 122                  DTMWSFilter whiteSpaceFilter,
 123                  XMLStringFactory xstringfactory,
 124                  boolean doIndexing)
 125   {
 126     super(mgr, source, dtmIdentity, whiteSpaceFilter,
 127           xstringfactory, doIndexing);
 128 
 129     // NEVER track source locators for RTFs; they aren't meaningful. I think.
 130     // (If we did track them, we'd need to tail-prune these too.)
 131     //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;
 132     m_useSourceLocationProperty=false;
 133     m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
 134                                                      : null;
 135     m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
 136     m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
 137 
 138     // Record initial sizes of fields that are pushed and restored
 139     // for RTF tail-pruning.  More entries can be popped than pushed, so
 140     // we need this to mark the primordial state of the DTM.
 141     m_emptyNodeCount = m_size;
 142     m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
 143                                  ? 0 : m_namespaceDeclSets.size();
 144     m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
 145                                       ? 0 : m_namespaceDeclSetElements.size();
 146     m_emptyDataCount = m_data.size();
 147     m_emptyCharsCount = m_chars.size();
 148     m_emptyDataQNCount = m_dataOrQName.size();
 149   }
 150 
 151   /**
 152    * Given a DTM, find the owning document node. In the case of
 153    * SAX2RTFDTM, which may contain multiple documents, this returns
 154    * the <b>most recently started</b> document, or null if the DTM is
 155    * empty or no document is currently under construction.
 156    *
 157    * %REVIEW% Should we continue to report the most recent after
 158    * construction has ended? I think not, given that it may have been
 159    * tail-pruned.
 160    *
 161    *  @return int Node handle of Document node, or null if this DTM does not
 162    *  contain an "active" document.
 163    * */
 164   public int getDocument()
 165   {
 166     return makeNodeHandle(m_currentDocumentNode);
 167   }
 168 
 169   /**
 170    * Given a node handle, find the owning document node, using DTM semantics
 171    * (Document owns itself) rather than DOM semantics (Document has no owner).
 172    *
 173    * (I'm counting on the fact that getOwnerDocument() is implemented on top
 174    * of this call, in the superclass, to avoid having to rewrite that one.
 175    * Be careful if that code changes!)
 176    *
 177    * @param nodeHandle the id of the node.
 178    * @return int Node handle of owning document
 179    */
 180   public int getDocumentRoot(int nodeHandle)
 181   {
 182     for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
 183       if (_type(id)==DTM.DOCUMENT_NODE) {
 184         return makeNodeHandle(id);
 185       }
 186     }
 187 
 188     return DTM.NULL; // Safety net; should never happen
 189   }
 190 
 191   /**
 192    * Given a node identifier, find the owning document node.  Unlike the DOM,
 193    * this considers the owningDocument of a Document to be itself. Note that
 194    * in shared DTMs this may not be zero.
 195    *
 196    * @param nodeIdentifier the id of the starting node.
 197    * @return int Node identifier of the root of this DTM tree
 198    */
 199   protected int _documentRoot(int nodeIdentifier)
 200   {
 201     if(nodeIdentifier==NULL) return NULL;
 202 
 203     for (int parent=_parent(nodeIdentifier);
 204          parent!=NULL;
 205          nodeIdentifier=parent,parent=_parent(nodeIdentifier))
 206       ;
 207 
 208     return nodeIdentifier;
 209   }
 210 
 211   /**
 212    * Receive notification of the beginning of a new RTF document.
 213    *
 214    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
 215    * might want to consider folding the start/endDocument changes back
 216    * into the main SAX2DTM so we don't have to expose so many fields
 217    * (even as Protected) and carry the additional code.
 218    *
 219    * @throws SAXException Any SAX exception, possibly
 220    *            wrapping another exception.
 221    * @see org.xml.sax.ContentHandler#startDocument
 222    * */
 223   public void startDocument() throws SAXException
 224   {
 225     // Re-initialize the tree append process
 226     m_endDocumentOccured = false;
 227     m_prefixMappings = new Vector<>();
 228     m_contextIndexes = new IntStack();
 229     m_parents = new IntStack();
 230 
 231     m_currentDocumentNode=m_size;
 232     super.startDocument();
 233   }
 234 
 235   /**
 236    * Receive notification of the end of the document.
 237    *
 238    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
 239    * might want to consider folding the start/endDocument changes back
 240    * into the main SAX2DTM so we don't have to expose so many fields
 241    * (even as Protected).
 242    *
 243    * @throws SAXException Any SAX exception, possibly
 244    *            wrapping another exception.
 245    * @see org.xml.sax.ContentHandler#endDocument
 246    * */
 247   public void endDocument() throws SAXException
 248   {
 249     charactersFlush();
 250 
 251     m_nextsib.setElementAt(NULL,m_currentDocumentNode);
 252 
 253     if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
 254       m_firstch.setElementAt(NULL,m_currentDocumentNode);
 255 
 256     if (DTM.NULL != m_previous)
 257       m_nextsib.setElementAt(DTM.NULL,m_previous);
 258 
 259     m_parents = null;
 260     m_prefixMappings = null;
 261     m_contextIndexes = null;
 262 
 263     m_currentDocumentNode= NULL; // no longer open
 264     m_endDocumentOccured = true;
 265   }
 266 
 267 
 268   /** "Tail-pruning" support for RTFs.
 269    *
 270    * This function pushes information about the current size of the
 271    * DTM's data structures onto a stack, for use by popRewindMark()
 272    * (which see).
 273    *
 274    * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
 275    * RTFs will not be indexed, so I can simply panic if that case
 276    * arises. Hey, it works...
 277    * */
 278   public void pushRewindMark()
 279   {
 280     if(m_indexing || m_elemIndexes!=null)
 281       throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
 282 
 283     // Values from DTMDefaultBase
 284     // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
 285     mark_size.push(m_size);
 286     mark_nsdeclset_size.push((m_namespaceDeclSets==null)
 287                                    ? 0
 288                                    : m_namespaceDeclSets.size());
 289     mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
 290                                    ? 0
 291                                    : m_namespaceDeclSetElements.size());
 292 
 293     // Values from SAX2DTM
 294     mark_data_size.push(m_data.size());
 295     mark_char_size.push(m_chars.size());
 296     mark_doq_size.push(m_dataOrQName.size());
 297   }
 298 
 299   /** "Tail-pruning" support for RTFs.
 300    *
 301    * This function pops the information previously saved by
 302    * pushRewindMark (which see) and uses it to discard all nodes added
 303    * to the DTM after that time. We expect that this will allow us to
 304    * reuse storage more effectively.
 305    *
 306    * This is _not_ intended to be called while a document is still being
 307    * constructed -- only between endDocument and the next startDocument
 308    *
 309    * %REVIEW% WARNING: This is the first use of some of the truncation
 310    * methods.  If Xalan blows up after this is called, that's a likely
 311    * place to check.
 312    *
 313    * %REVIEW% Our original design for DTMs permitted them to share
 314    * string pools.  If there any risk that this might be happening, we
 315    * can _not_ rewind and recover the string storage. One solution
 316    * might to assert that DTMs used for RTFs Must Not take advantage
 317    * of that feature, but this seems excessively fragile. Another, much
 318    * less attractive, would be to just let them leak... Nah.
 319    *
 320    * @return true if and only if the pop completely emptied the
 321    * RTF. That response is used when determining how to unspool
 322    * RTF-started-while-RTF-open situations.
 323    * */
 324   public boolean popRewindMark()
 325   {
 326     boolean top=mark_size.empty();
 327 
 328     m_size=top ? m_emptyNodeCount : mark_size.pop();
 329     m_exptype.setSize(m_size);
 330     m_firstch.setSize(m_size);
 331     m_nextsib.setSize(m_size);
 332     m_prevsib.setSize(m_size);
 333     m_parent.setSize(m_size);
 334 
 335     m_elemIndexes=null;
 336 
 337     int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
 338     if (m_namespaceDeclSets!=null) {
 339       m_namespaceDeclSets.setSize(ds);
 340     }
 341 
 342     int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
 343     if (m_namespaceDeclSetElements!=null) {
 344       m_namespaceDeclSetElements.setSize(ds1);
 345     }
 346 
 347     // Values from SAX2DTM - m_data always has a reserved entry
 348     m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
 349     m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
 350     m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
 351 
 352     // Return true iff DTM now empty
 353     return m_size==0;
 354   }
 355 
 356   /** @return true if a DTM tree is currently under construction.
 357    * */
 358   public boolean isTreeIncomplete()
 359   {
 360     return !m_endDocumentOccured;
 361   }
 362 }