1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package com.sun.org.apache.xml.internal.serializer; 21 22 import com.sun.org.apache.xalan.internal.utils.SecuritySupport; 23 import java.io.File; 24 25 import com.sun.org.apache.xml.internal.serializer.utils.AttList; 26 import com.sun.org.apache.xml.internal.serializer.utils.DOM2Helper; 27 import org.w3c.dom.Comment; 28 import org.w3c.dom.Element; 29 import org.w3c.dom.EntityReference; 30 import org.w3c.dom.NamedNodeMap; 31 import org.w3c.dom.Node; 32 import org.w3c.dom.ProcessingInstruction; 33 import org.w3c.dom.Text; 34 35 import org.xml.sax.ContentHandler; 36 import org.xml.sax.Locator; 37 import org.xml.sax.ext.LexicalHandler; 38 import org.xml.sax.helpers.LocatorImpl; 39 40 /** 41 * This class does a pre-order walk of the DOM tree, calling a ContentHandler 42 * interface as it goes. 43 * 44 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 45 * It exists to cut the serializers dependancy on that package. 46 * 47 * @xsl.usage internal 48 */ 49 50 public final class TreeWalker 51 { 52 53 /** Local reference to a ContentHandler */ 54 final private ContentHandler m_contentHandler; 55 /** 56 * If m_contentHandler is a SerializationHandler, then this is 57 * a reference to the same object. 58 */ 59 final private SerializationHandler m_Serializer; 60 61 // ARGHH!! JAXP Uses Xerces without setting the namespace processing to ON! 62 // DOM2Helper m_dh = new DOM2Helper(); 63 64 /** DomHelper for this TreeWalker */ 65 final protected DOM2Helper m_dh; 66 67 /** Locator object for this TreeWalker */ 68 final private LocatorImpl m_locator = new LocatorImpl(); 69 70 /** 71 * Get the ContentHandler used for the tree walk. 72 * 73 * @return the ContentHandler used for the tree walk 74 */ 75 public ContentHandler getContentHandler() 76 { 77 return m_contentHandler; 78 } 79 80 public TreeWalker(ContentHandler ch) { 81 this(ch,null); 82 } 83 /** 84 * Constructor. 85 * @param contentHandler The implemention of the 86 * contentHandler operation (toXMLString, digest, ...) 87 */ 88 public TreeWalker(ContentHandler contentHandler, String systemId) 89 { 90 // Set the content handler 91 m_contentHandler = contentHandler; 92 if (m_contentHandler instanceof SerializationHandler) { 93 m_Serializer = (SerializationHandler) m_contentHandler; 94 } else { 95 m_Serializer = null; 96 } 97 // Set the system ID, if it is given 98 m_contentHandler.setDocumentLocator(m_locator); 99 if (systemId != null) { 100 m_locator.setSystemId(systemId); 101 } 102 103 m_dh = new DOM2Helper(); 104 } 105 106 /** 107 * Perform a pre-order traversal non-recursive style. 108 * 109 * Note that TreeWalker assumes that the subtree is intended to represent 110 * a complete (though not necessarily well-formed) document and, during a 111 * traversal, startDocument and endDocument will always be issued to the 112 * SAX listener. 113 * 114 * @param pos Node in the tree where to start traversal 115 * 116 * @throws TransformerException 117 */ 118 public void traverse(Node pos) throws org.xml.sax.SAXException 119 { 120 121 this.m_contentHandler.startDocument(); 122 123 Node top = pos; 124 125 while (null != pos) 126 { 127 startNode(pos); 128 129 Node nextNode = pos.getFirstChild(); 130 131 while (null == nextNode) 132 { 133 endNode(pos); 134 135 if (top.equals(pos)) 136 break; 137 138 nextNode = pos.getNextSibling(); 139 140 if (null == nextNode) 141 { 142 pos = pos.getParentNode(); 143 144 if ((null == pos) || (top.equals(pos))) 145 { 146 if (null != pos) 147 endNode(pos); 148 149 nextNode = null; 150 151 break; 152 } 153 } 154 } 155 156 pos = nextNode; 157 } 158 this.m_contentHandler.endDocument(); 159 } 160 161 /** 162 * Perform a pre-order traversal non-recursive style. 163 164 * Note that TreeWalker assumes that the subtree is intended to represent 165 * a complete (though not necessarily well-formed) document and, during a 166 * traversal, startDocument and endDocument will always be issued to the 167 * SAX listener. 168 * 169 * @param pos Node in the tree where to start traversal 170 * @param top Node in the tree where to end traversal 171 * 172 * @throws TransformerException 173 */ 174 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException 175 { 176 177 this.m_contentHandler.startDocument(); 178 179 while (null != pos) 180 { 181 startNode(pos); 182 183 Node nextNode = pos.getFirstChild(); 184 185 while (null == nextNode) 186 { 187 endNode(pos); 188 189 if ((null != top) && top.equals(pos)) 190 break; 191 192 nextNode = pos.getNextSibling(); 193 194 if (null == nextNode) 195 { 196 pos = pos.getParentNode(); 197 198 if ((null == pos) || ((null != top) && top.equals(pos))) 199 { 200 nextNode = null; 201 202 break; 203 } 204 } 205 } 206 207 pos = nextNode; 208 } 209 this.m_contentHandler.endDocument(); 210 } 211 212 /** Flag indicating whether following text to be processed is raw text */ 213 boolean nextIsRaw = false; 214 215 /** 216 * Optimized dispatch of characters. 217 */ 218 private final void dispatachChars(Node node) 219 throws org.xml.sax.SAXException 220 { 221 if(m_Serializer != null) 222 { 223 this.m_Serializer.characters(node); 224 } 225 else 226 { 227 String data = ((Text) node).getData(); 228 this.m_contentHandler.characters(data.toCharArray(), 0, data.length()); 229 } 230 } 231 232 /** 233 * Start processing given node 234 * 235 * 236 * @param node Node to process 237 * 238 * @throws org.xml.sax.SAXException 239 */ 240 protected void startNode(Node node) throws org.xml.sax.SAXException 241 { 242 243 // TODO: <REVIEW> 244 // A Serializer implements ContentHandler, but not NodeConsumer 245 // so drop this reference to NodeConsumer which would otherwise 246 // pull in all sorts of things 247 // if (m_contentHandler instanceof NodeConsumer) 248 // { 249 // ((NodeConsumer) m_contentHandler).setOriginatingNode(node); 250 // } 251 // TODO: </REVIEW> 252 253 if (node instanceof Locator) 254 { 255 Locator loc = (Locator)node; 256 m_locator.setColumnNumber(loc.getColumnNumber()); 257 m_locator.setLineNumber(loc.getLineNumber()); 258 m_locator.setPublicId(loc.getPublicId()); 259 m_locator.setSystemId(loc.getSystemId()); 260 } 261 else 262 { 263 m_locator.setColumnNumber(0); 264 m_locator.setLineNumber(0); 265 } 266 267 switch (node.getNodeType()) 268 { 269 case Node.COMMENT_NODE : 270 { 271 String data = ((Comment) node).getData(); 272 273 if (m_contentHandler instanceof LexicalHandler) 274 { 275 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 276 277 lh.comment(data.toCharArray(), 0, data.length()); 278 } 279 } 280 break; 281 case Node.DOCUMENT_FRAGMENT_NODE : 282 283 // ??; 284 break; 285 case Node.DOCUMENT_NODE : 286 287 break; 288 case Node.ELEMENT_NODE : 289 Element elem_node = (Element) node; 290 { 291 // Make sure the namespace node 292 // for the element itself is declared 293 // to the ContentHandler 294 String uri = elem_node.getNamespaceURI(); 295 if (uri != null) { 296 String prefix = elem_node.getPrefix(); 297 if (prefix==null) 298 prefix=""; 299 this.m_contentHandler.startPrefixMapping(prefix,uri); 300 } 301 } 302 NamedNodeMap atts = elem_node.getAttributes(); 303 int nAttrs = atts.getLength(); 304 // System.out.println("TreeWalker#startNode: "+node.getNodeName()); 305 306 307 // Make sure the namespace node of 308 // each attribute is declared to the ContentHandler 309 for (int i = 0; i < nAttrs; i++) 310 { 311 final Node attr = atts.item(i); 312 final String attrName = attr.getNodeName(); 313 final int colon = attrName.indexOf(':'); 314 final String prefix; 315 316 // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue()); 317 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 318 { 319 // Use "" instead of null, as Xerces likes "" for the 320 // name of the default namespace. Fix attributed 321 // to "Steven Murray" <smurray@ebt.com>. 322 if (colon < 0) 323 prefix = ""; 324 else 325 prefix = attrName.substring(colon + 1); 326 327 this.m_contentHandler.startPrefixMapping(prefix, 328 attr.getNodeValue()); 329 } 330 else if (colon > 0) { 331 prefix = attrName.substring(0,colon); 332 String uri = attr.getNamespaceURI(); 333 if (uri != null) 334 this.m_contentHandler.startPrefixMapping(prefix,uri); 335 } 336 } 337 338 String ns = m_dh.getNamespaceOfNode(node); 339 if(null == ns) 340 ns = ""; 341 this.m_contentHandler.startElement(ns, 342 m_dh.getLocalNameOfNode(node), 343 node.getNodeName(), 344 new AttList(atts, m_dh)); 345 break; 346 case Node.PROCESSING_INSTRUCTION_NODE : 347 { 348 ProcessingInstruction pi = (ProcessingInstruction) node; 349 String name = pi.getNodeName(); 350 351 // String data = pi.getData(); 352 if (name.equals("xslt-next-is-raw")) 353 { 354 nextIsRaw = true; 355 } 356 else 357 { 358 this.m_contentHandler.processingInstruction(pi.getNodeName(), 359 pi.getData()); 360 } 361 } 362 break; 363 case Node.CDATA_SECTION_NODE : 364 { 365 boolean isLexH = (m_contentHandler instanceof LexicalHandler); 366 LexicalHandler lh = isLexH 367 ? ((LexicalHandler) this.m_contentHandler) : null; 368 369 if (isLexH) 370 { 371 lh.startCDATA(); 372 } 373 374 dispatachChars(node); 375 376 { 377 if (isLexH) 378 { 379 lh.endCDATA(); 380 } 381 } 382 } 383 break; 384 case Node.TEXT_NODE : 385 { 386 //String data = ((Text) node).getData(); 387 388 if (nextIsRaw) 389 { 390 nextIsRaw = false; 391 392 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, ""); 393 dispatachChars(node); 394 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, ""); 395 } 396 else 397 { 398 dispatachChars(node); 399 } 400 } 401 break; 402 case Node.ENTITY_REFERENCE_NODE : 403 { 404 EntityReference eref = (EntityReference) node; 405 406 if (m_contentHandler instanceof LexicalHandler) 407 { 408 ((LexicalHandler) this.m_contentHandler).startEntity( 409 eref.getNodeName()); 410 } 411 else 412 { 413 414 // warning("Can not output entity to a pure SAX ContentHandler"); 415 } 416 } 417 break; 418 default : 419 } 420 } 421 422 /** 423 * End processing of given node 424 * 425 * 426 * @param node Node we just finished processing 427 * 428 * @throws org.xml.sax.SAXException 429 */ 430 protected void endNode(Node node) throws org.xml.sax.SAXException 431 { 432 433 switch (node.getNodeType()) 434 { 435 case Node.DOCUMENT_NODE : 436 break; 437 438 case Node.ELEMENT_NODE : 439 String ns = m_dh.getNamespaceOfNode(node); 440 if(null == ns) 441 ns = ""; 442 this.m_contentHandler.endElement(ns, 443 m_dh.getLocalNameOfNode(node), 444 node.getNodeName()); 445 446 if (m_Serializer == null) { 447 // Don't bother with endPrefixMapping calls if the ContentHandler is a 448 // SerializationHandler because SerializationHandler's ignore the 449 // endPrefixMapping() calls anyways. . . . This is an optimization. 450 Element elem_node = (Element) node; 451 NamedNodeMap atts = elem_node.getAttributes(); 452 int nAttrs = atts.getLength(); 453 454 // do the endPrefixMapping calls in reverse order 455 // of the startPrefixMapping calls 456 for (int i = (nAttrs-1); 0 <= i; i--) 457 { 458 final Node attr = atts.item(i); 459 final String attrName = attr.getNodeName(); 460 final int colon = attrName.indexOf(':'); 461 final String prefix; 462 463 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 464 { 465 // Use "" instead of null, as Xerces likes "" for the 466 // name of the default namespace. Fix attributed 467 // to "Steven Murray" <smurray@ebt.com>. 468 if (colon < 0) 469 prefix = ""; 470 else 471 prefix = attrName.substring(colon + 1); 472 473 this.m_contentHandler.endPrefixMapping(prefix); 474 } 475 else if (colon > 0) { 476 prefix = attrName.substring(0, colon); 477 this.m_contentHandler.endPrefixMapping(prefix); 478 } 479 } 480 { 481 String uri = elem_node.getNamespaceURI(); 482 if (uri != null) { 483 String prefix = elem_node.getPrefix(); 484 if (prefix==null) 485 prefix=""; 486 this.m_contentHandler.endPrefixMapping(prefix); 487 } 488 } 489 } 490 break; 491 case Node.CDATA_SECTION_NODE : 492 break; 493 case Node.ENTITY_REFERENCE_NODE : 494 { 495 EntityReference eref = (EntityReference) node; 496 497 if (m_contentHandler instanceof LexicalHandler) 498 { 499 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 500 501 lh.endEntity(eref.getNodeName()); 502 } 503 } 504 break; 505 default : 506 } 507 } 508 } //TreeWalker