1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm; 22 23 import com.sun.org.apache.xml.internal.dtm.DTM; 24 import com.sun.org.apache.xml.internal.dtm.DTMManager; 25 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter; 26 import com.sun.org.apache.xml.internal.utils.IntStack; 27 import com.sun.org.apache.xml.internal.utils.IntVector; 28 import com.sun.org.apache.xml.internal.utils.StringVector; 29 import com.sun.org.apache.xml.internal.utils.XMLStringFactory; 30 import java.util.Vector; 31 import javax.xml.transform.Source; 32 import org.xml.sax.SAXException; 33 34 /** 35 * This is a subclass of SAX2DTM which has been modified to meet the needs of 36 * Result Tree Frameworks (RTFs). The differences are: 37 * 38 * 1) Multiple XML trees may be appended to the single DTM. This means 39 * that the root node of each document is _not_ node 0. Some code has 40 * had to be deoptimized to support this mode of operation, and an 41 * explicit mechanism for obtaining the Node Handle of the root node 42 * has been provided. 43 * 44 * 2) A stack of these documents is maintained, allowing us to "tail-prune" the 45 * most recently added trees off the end of the DTM as stylesheet elements 46 * (and thus variable contexts) are exited. 47 * 48 * PLEASE NOTE that this class may be _heavily_ dependent upon the 49 * internals of the SAX2DTM superclass, and must be maintained in 50 * parallel with that code. Arguably, they should be conditionals 51 * within a single class... but they have deen separated for 52 * performance reasons. (In fact, one could even argue about which is 53 * the superclass and which is the subclass; the current arrangement 54 * is as much about preserving stability of existing code during 55 * development as anything else.) 56 * 57 * %REVIEW% In fact, since the differences are so minor, I think it 58 * may be possible/practical to fold them back into the base 59 * SAX2DTM. Consider that as a future code-size optimization. 60 * 61 * @LastModified: Oct 2017 62 */ 63 public class SAX2RTFDTM extends SAX2DTM 64 { 65 /** Set true to monitor SAX events and similar diagnostic info. */ 66 private static final boolean DEBUG = false; 67 68 /** Most recently started Document, or null if the DTM is empty. */ 69 private int m_currentDocumentNode=NULL; 70 71 /** Tail-pruning mark: Number of nodes in use */ 72 IntStack mark_size=new IntStack(); 73 /** Tail-pruning mark: Number of data items in use */ 74 IntStack mark_data_size=new IntStack(); 75 /** Tail-pruning mark: Number of size-of-data fields in use */ 76 IntStack mark_char_size=new IntStack(); 77 /** Tail-pruning mark: Number of dataOrQName slots in use */ 78 IntStack mark_doq_size=new IntStack(); 79 /** Tail-pruning mark: Number of namespace declaration sets in use 80 * %REVIEW% I don't think number of NS sets is ever different from number 81 * of NS elements. We can probabably reduce these to a single stack and save 82 * some storage. 83 * */ 84 IntStack mark_nsdeclset_size=new IntStack(); 85 /** Tail-pruning mark: Number of naespace declaration elements in use 86 * %REVIEW% I don't think number of NS sets is ever different from number 87 * of NS elements. We can probabably reduce these to a single stack and save 88 * some storage. 89 */ 90 IntStack mark_nsdeclelem_size=new IntStack(); 91 92 /** 93 * Tail-pruning mark: initial number of nodes in use 94 */ 95 int m_emptyNodeCount; 96 97 /** 98 * Tail-pruning mark: initial number of namespace declaration sets 99 */ 100 int m_emptyNSDeclSetCount; 101 102 /** 103 * Tail-pruning mark: initial number of namespace declaration elements 104 */ 105 int m_emptyNSDeclSetElemsCount; 106 107 /** 108 * Tail-pruning mark: initial number of data items in use 109 */ 110 int m_emptyDataCount; 111 112 /** 113 * Tail-pruning mark: initial number of characters in use 114 */ 115 int m_emptyCharsCount; 116 117 /** 118 * Tail-pruning mark: default initial number of dataOrQName slots in use 119 */ 120 int m_emptyDataQNCount; 121 122 public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity, 123 DTMWSFilter whiteSpaceFilter, 124 XMLStringFactory xstringfactory, 125 boolean doIndexing) 126 { 127 super(mgr, source, dtmIdentity, whiteSpaceFilter, 128 xstringfactory, doIndexing); 129 130 // NEVER track source locators for RTFs; they aren't meaningful. I think. 131 // (If we did track them, we'd need to tail-prune these too.) 132 //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location; 133 m_useSourceLocationProperty=false; 134 m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() 135 : null; 136 m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null; 137 m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null; 138 139 // Record initial sizes of fields that are pushed and restored 140 // for RTF tail-pruning. More entries can be popped than pushed, so 141 // we need this to mark the primordial state of the DTM. 142 m_emptyNodeCount = m_size; 143 m_emptyNSDeclSetCount = (m_namespaceDeclSets == null) 144 ? 0 : m_namespaceDeclSets.size(); 145 m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null) 146 ? 0 : m_namespaceDeclSetElements.size(); 147 m_emptyDataCount = m_data.size(); 148 m_emptyCharsCount = m_chars.size(); 149 m_emptyDataQNCount = m_dataOrQName.size(); 150 } 151 152 /** 153 * Given a DTM, find the owning document node. In the case of 154 * SAX2RTFDTM, which may contain multiple documents, this returns 155 * the <b>most recently started</b> document, or null if the DTM is 156 * empty or no document is currently under construction. 157 * 158 * %REVIEW% Should we continue to report the most recent after 159 * construction has ended? I think not, given that it may have been 160 * tail-pruned. 161 * 162 * @return int Node handle of Document node, or null if this DTM does not 163 * contain an "active" document. 164 * */ 165 public int getDocument() 166 { 167 return makeNodeHandle(m_currentDocumentNode); 168 } 169 170 /** 171 * Given a node handle, find the owning document node, using DTM semantics 172 * (Document owns itself) rather than DOM semantics (Document has no owner). 173 * 174 * (I'm counting on the fact that getOwnerDocument() is implemented on top 175 * of this call, in the superclass, to avoid having to rewrite that one. 176 * Be careful if that code changes!) 177 * 178 * @param nodeHandle the id of the node. 179 * @return int Node handle of owning document 180 */ 181 public int getDocumentRoot(int nodeHandle) 182 { 183 for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) { 184 if (_type(id)==DTM.DOCUMENT_NODE) { 185 return makeNodeHandle(id); 186 } 187 } 188 189 return DTM.NULL; // Safety net; should never happen 190 } 191 192 /** 193 * Given a node identifier, find the owning document node. Unlike the DOM, 194 * this considers the owningDocument of a Document to be itself. Note that 195 * in shared DTMs this may not be zero. 196 * 197 * @param nodeIdentifier the id of the starting node. 198 * @return int Node identifier of the root of this DTM tree 199 */ 200 protected int _documentRoot(int nodeIdentifier) 201 { 202 if(nodeIdentifier==NULL) return NULL; 203 204 for (int parent=_parent(nodeIdentifier); 205 parent!=NULL; 206 nodeIdentifier=parent,parent=_parent(nodeIdentifier)) 207 ; 208 209 return nodeIdentifier; 210 } 211 212 /** 213 * Receive notification of the beginning of a new RTF document. 214 * 215 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We 216 * might want to consider folding the start/endDocument changes back 217 * into the main SAX2DTM so we don't have to expose so many fields 218 * (even as Protected) and carry the additional code. 219 * 220 * @throws SAXException Any SAX exception, possibly 221 * wrapping another exception. 222 * @see org.xml.sax.ContentHandler#startDocument 223 * */ 224 public void startDocument() throws SAXException 225 { 226 // Re-initialize the tree append process 227 m_endDocumentOccured = false; 228 m_prefixMappings = new Vector<>(); 229 m_contextIndexes = new IntStack(); 230 m_parents = new IntStack(); 231 232 m_currentDocumentNode=m_size; 233 super.startDocument(); 234 } 235 236 /** 237 * Receive notification of the end of the document. 238 * 239 * %REVIEW% Y'know, this isn't all that much of a deoptimization. We 240 * might want to consider folding the start/endDocument changes back 241 * into the main SAX2DTM so we don't have to expose so many fields 242 * (even as Protected). 243 * 244 * @throws SAXException Any SAX exception, possibly 245 * wrapping another exception. 246 * @see org.xml.sax.ContentHandler#endDocument 247 * */ 248 public void endDocument() throws SAXException 249 { 250 charactersFlush(); 251 252 m_nextsib.setElementAt(NULL,m_currentDocumentNode); 253 254 if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED) 255 m_firstch.setElementAt(NULL,m_currentDocumentNode); 256 257 if (DTM.NULL != m_previous) 258 m_nextsib.setElementAt(DTM.NULL,m_previous); 259 260 m_parents = null; 261 m_prefixMappings = null; 262 m_contextIndexes = null; 263 264 m_currentDocumentNode= NULL; // no longer open 265 m_endDocumentOccured = true; 266 } 267 268 269 /** "Tail-pruning" support for RTFs. 270 * 271 * This function pushes information about the current size of the 272 * DTM's data structures onto a stack, for use by popRewindMark() 273 * (which see). 274 * 275 * %REVIEW% I have no idea how to rewind m_elemIndexes. However, 276 * RTFs will not be indexed, so I can simply panic if that case 277 * arises. Hey, it works... 278 * */ 279 public void pushRewindMark() 280 { 281 if(m_indexing || m_elemIndexes!=null) 282 throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM"); 283 284 // Values from DTMDefaultBase 285 // %REVIEW% Can the namespace stack sizes ever differ? If not, save space! 286 mark_size.push(m_size); 287 mark_nsdeclset_size.push((m_namespaceDeclSets==null) 288 ? 0 289 : m_namespaceDeclSets.size()); 290 mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null) 291 ? 0 292 : m_namespaceDeclSetElements.size()); 293 294 // Values from SAX2DTM 295 mark_data_size.push(m_data.size()); 296 mark_char_size.push(m_chars.size()); 297 mark_doq_size.push(m_dataOrQName.size()); 298 } 299 300 /** "Tail-pruning" support for RTFs. 301 * 302 * This function pops the information previously saved by 303 * pushRewindMark (which see) and uses it to discard all nodes added 304 * to the DTM after that time. We expect that this will allow us to 305 * reuse storage more effectively. 306 * 307 * This is _not_ intended to be called while a document is still being 308 * constructed -- only between endDocument and the next startDocument 309 * 310 * %REVIEW% WARNING: This is the first use of some of the truncation 311 * methods. If Xalan blows up after this is called, that's a likely 312 * place to check. 313 * 314 * %REVIEW% Our original design for DTMs permitted them to share 315 * string pools. If there any risk that this might be happening, we 316 * can _not_ rewind and recover the string storage. One solution 317 * might to assert that DTMs used for RTFs Must Not take advantage 318 * of that feature, but this seems excessively fragile. Another, much 319 * less attractive, would be to just let them leak... Nah. 320 * 321 * @return true if and only if the pop completely emptied the 322 * RTF. That response is used when determining how to unspool 323 * RTF-started-while-RTF-open situations. 324 * */ 325 public boolean popRewindMark() 326 { 327 boolean top=mark_size.empty(); 328 329 m_size=top ? m_emptyNodeCount : mark_size.pop(); 330 m_exptype.setSize(m_size); 331 m_firstch.setSize(m_size); 332 m_nextsib.setSize(m_size); 333 m_prevsib.setSize(m_size); 334 m_parent.setSize(m_size); 335 336 m_elemIndexes=null; 337 338 int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop(); 339 if (m_namespaceDeclSets!=null) { 340 m_namespaceDeclSets.setSize(ds); 341 } 342 343 int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop(); 344 if (m_namespaceDeclSetElements!=null) { 345 m_namespaceDeclSetElements.setSize(ds1); 346 } 347 348 // Values from SAX2DTM - m_data always has a reserved entry 349 m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop()); 350 m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop()); 351 m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop()); 352 353 // Return true iff DTM now empty 354 return m_size==0; 355 } 356 357 /** @return true if a DTM tree is currently under construction. 358 * */ 359 public boolean isTreeIncomplete() 360 { 361 return !m_endDocumentOccured; 362 } 363 }