1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 package com.sun.org.apache.xml.internal.serializer;
  21 
  22 import com.sun.org.apache.xalan.internal.utils.SecuritySupport;
  23 import java.io.File;
  24 
  25 import com.sun.org.apache.xml.internal.serializer.utils.AttList;
  26 import com.sun.org.apache.xml.internal.serializer.utils.DOM2Helper;
  27 import org.w3c.dom.Comment;
  28 import org.w3c.dom.Element;
  29 import org.w3c.dom.EntityReference;
  30 import org.w3c.dom.NamedNodeMap;
  31 import org.w3c.dom.Node;
  32 import org.w3c.dom.ProcessingInstruction;
  33 import org.w3c.dom.Text;
  34 
  35 import org.xml.sax.ContentHandler;
  36 import org.xml.sax.Locator;
  37 import org.xml.sax.ext.LexicalHandler;
  38 import org.xml.sax.helpers.LocatorImpl;
  39 
  40 /**
  41  * This class does a pre-order walk of the DOM tree, calling a ContentHandler
  42  * interface as it goes.
  43  *
  44  * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
  45  * It exists to cut the serializers dependancy on that package.
  46  *
  47  * @xsl.usage internal
  48  */
  49 
  50 public final class TreeWalker
  51 {
  52 
  53   /** Local reference to a ContentHandler          */
  54   final private ContentHandler m_contentHandler;
  55   /**
  56    * If m_contentHandler is a SerializationHandler, then this is
  57    * a reference to the same object.
  58    */
  59   final private SerializationHandler m_Serializer;
  60 
  61   // ARGHH!!  JAXP Uses Xerces without setting the namespace processing to ON!
  62   // DOM2Helper m_dh = new DOM2Helper();
  63 
  64   /** DomHelper for this TreeWalker          */
  65   final protected DOM2Helper m_dh;
  66 
  67   /** Locator object for this TreeWalker          */
  68   final private LocatorImpl m_locator = new LocatorImpl();
  69 
  70   /**
  71    * Get the ContentHandler used for the tree walk.
  72    *
  73    * @return the ContentHandler used for the tree walk
  74    */
  75   public ContentHandler getContentHandler()
  76   {
  77     return m_contentHandler;
  78   }
  79 
  80   public TreeWalker(ContentHandler ch) {
  81       this(ch,null);
  82   }
  83   /**
  84    * Constructor.
  85    * @param   contentHandler The implemention of the
  86    * contentHandler operation (toXMLString, digest, ...)
  87    */
  88   public TreeWalker(ContentHandler contentHandler, String systemId)
  89   {
  90       // Set the content handler
  91       m_contentHandler = contentHandler;
  92       if (m_contentHandler instanceof SerializationHandler) {
  93           m_Serializer = (SerializationHandler) m_contentHandler;
  94       } else {
  95           m_Serializer = null;
  96       }
  97       // Set the system ID, if it is given
  98       m_contentHandler.setDocumentLocator(m_locator);
  99       if (systemId != null) {
 100           m_locator.setSystemId(systemId);
 101       }
 102 
 103       m_dh = new DOM2Helper();
 104   }
 105 
 106   /**
 107    * Perform a pre-order traversal non-recursive style.
 108    *
 109    * Note that TreeWalker assumes that the subtree is intended to represent
 110    * a complete (though not necessarily well-formed) document and, during a
 111    * traversal, startDocument and endDocument will always be issued to the
 112    * SAX listener.
 113    *
 114    * @param pos Node in the tree where to start traversal
 115    *
 116    * @throws TransformerException
 117    */
 118   public void traverse(Node pos) throws org.xml.sax.SAXException
 119   {
 120 
 121     this.m_contentHandler.startDocument();
 122 
 123     Node top = pos;
 124 
 125     while (null != pos)
 126     {
 127       startNode(pos);
 128 
 129       Node nextNode = pos.getFirstChild();
 130 
 131       while (null == nextNode)
 132       {
 133         endNode(pos);
 134 
 135         if (top.equals(pos))
 136           break;
 137 
 138         nextNode = pos.getNextSibling();
 139 
 140         if (null == nextNode)
 141         {
 142           pos = pos.getParentNode();
 143 
 144           if ((null == pos) || (top.equals(pos)))
 145           {
 146             if (null != pos)
 147               endNode(pos);
 148 
 149             nextNode = null;
 150 
 151             break;
 152           }
 153         }
 154       }
 155 
 156       pos = nextNode;
 157     }
 158     this.m_contentHandler.endDocument();
 159   }
 160 
 161   /**
 162    * Perform a pre-order traversal non-recursive style.
 163 
 164    * Note that TreeWalker assumes that the subtree is intended to represent
 165    * a complete (though not necessarily well-formed) document and, during a
 166    * traversal, startDocument and endDocument will always be issued to the
 167    * SAX listener.
 168    *
 169    * @param pos Node in the tree where to start traversal
 170    * @param top Node in the tree where to end traversal
 171    *
 172    * @throws TransformerException
 173    */
 174   public void traverse(Node pos, Node top) throws org.xml.sax.SAXException
 175   {
 176 
 177     this.m_contentHandler.startDocument();
 178 
 179     while (null != pos)
 180     {
 181       startNode(pos);
 182 
 183       Node nextNode = pos.getFirstChild();
 184 
 185       while (null == nextNode)
 186       {
 187         endNode(pos);
 188 
 189         if ((null != top) && top.equals(pos))
 190           break;
 191 
 192         nextNode = pos.getNextSibling();
 193 
 194         if (null == nextNode)
 195         {
 196           pos = pos.getParentNode();
 197 
 198           if ((null == pos) || ((null != top) && top.equals(pos)))
 199           {
 200             nextNode = null;
 201 
 202             break;
 203           }
 204         }
 205       }
 206 
 207       pos = nextNode;
 208     }
 209     this.m_contentHandler.endDocument();
 210   }
 211 
 212   /** Flag indicating whether following text to be processed is raw text          */
 213   boolean nextIsRaw = false;
 214 
 215   /**
 216    * Optimized dispatch of characters.
 217    */
 218   private final void dispatachChars(Node node)
 219      throws org.xml.sax.SAXException
 220   {
 221     if(m_Serializer != null)
 222     {
 223       this.m_Serializer.characters(node);
 224     }
 225     else
 226     {
 227       String data = ((Text) node).getData();
 228       this.m_contentHandler.characters(data.toCharArray(), 0, data.length());
 229     }
 230   }
 231 
 232   /**
 233    * Start processing given node
 234    *
 235    *
 236    * @param node Node to process
 237    *
 238    * @throws org.xml.sax.SAXException
 239    */
 240   protected void startNode(Node node) throws org.xml.sax.SAXException
 241   {
 242 
 243 //   TODO: <REVIEW>
 244 //    A Serializer implements ContentHandler, but not NodeConsumer
 245 //    so drop this reference to NodeConsumer which would otherwise
 246 //    pull in all sorts of things
 247 //    if (m_contentHandler instanceof NodeConsumer)
 248 //    {
 249 //      ((NodeConsumer) m_contentHandler).setOriginatingNode(node);
 250 //    }
 251 //    TODO: </REVIEW>
 252 
 253                 if (node instanceof Locator)
 254                 {
 255                         Locator loc = (Locator)node;
 256                         m_locator.setColumnNumber(loc.getColumnNumber());
 257                         m_locator.setLineNumber(loc.getLineNumber());
 258                         m_locator.setPublicId(loc.getPublicId());
 259                         m_locator.setSystemId(loc.getSystemId());
 260                 }
 261                 else
 262                 {
 263                         m_locator.setColumnNumber(0);
 264       m_locator.setLineNumber(0);
 265                 }
 266 
 267     switch (node.getNodeType())
 268     {
 269     case Node.COMMENT_NODE :
 270     {
 271       String data = ((Comment) node).getData();
 272 
 273       if (m_contentHandler instanceof LexicalHandler)
 274       {
 275         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 276 
 277         lh.comment(data.toCharArray(), 0, data.length());
 278       }
 279     }
 280     break;
 281     case Node.DOCUMENT_FRAGMENT_NODE :
 282 
 283       // ??;
 284       break;
 285     case Node.DOCUMENT_NODE :
 286 
 287       break;
 288     case Node.ELEMENT_NODE :
 289       Element elem_node = (Element) node;
 290       {
 291           // Make sure the namespace node
 292           // for the element itself is declared
 293           // to the ContentHandler
 294           String uri = elem_node.getNamespaceURI();
 295           if (uri != null) {
 296               String prefix = elem_node.getPrefix();
 297               if (prefix==null)
 298                 prefix="";
 299               this.m_contentHandler.startPrefixMapping(prefix,uri);
 300           }
 301       }
 302       NamedNodeMap atts = elem_node.getAttributes();
 303       int nAttrs = atts.getLength();
 304       // System.out.println("TreeWalker#startNode: "+node.getNodeName());
 305 
 306 
 307       // Make sure the namespace node of
 308       // each attribute is declared to the ContentHandler
 309       for (int i = 0; i < nAttrs; i++)
 310       {
 311         final Node attr = atts.item(i);
 312         final String attrName = attr.getNodeName();
 313         final int colon = attrName.indexOf(':');
 314         final String prefix;
 315 
 316         // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue());
 317         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 318         {
 319           // Use "" instead of null, as Xerces likes "" for the
 320           // name of the default namespace.  Fix attributed
 321           // to "Steven Murray" <smurray@ebt.com>.
 322           if (colon < 0)
 323             prefix = "";
 324           else
 325             prefix = attrName.substring(colon + 1);
 326 
 327           this.m_contentHandler.startPrefixMapping(prefix,
 328                                                    attr.getNodeValue());
 329         }
 330         else if (colon > 0) {
 331             prefix = attrName.substring(0,colon);
 332             String uri = attr.getNamespaceURI();
 333             if (uri != null)
 334                 this.m_contentHandler.startPrefixMapping(prefix,uri);
 335         }
 336       }
 337 
 338       String ns = m_dh.getNamespaceOfNode(node);
 339       if(null == ns)
 340         ns = "";
 341       this.m_contentHandler.startElement(ns,
 342                                          m_dh.getLocalNameOfNode(node),
 343                                          node.getNodeName(),
 344                                          new AttList(atts, m_dh));
 345       break;
 346     case Node.PROCESSING_INSTRUCTION_NODE :
 347     {
 348       ProcessingInstruction pi = (ProcessingInstruction) node;
 349       String name = pi.getNodeName();
 350 
 351       // String data = pi.getData();
 352       if (name.equals("xslt-next-is-raw"))
 353       {
 354         nextIsRaw = true;
 355       }
 356       else
 357       {
 358         this.m_contentHandler.processingInstruction(pi.getNodeName(),
 359                                                     pi.getData());
 360       }
 361     }
 362     break;
 363     case Node.CDATA_SECTION_NODE :
 364     {
 365       boolean isLexH = (m_contentHandler instanceof LexicalHandler);
 366       LexicalHandler lh = isLexH
 367                           ? ((LexicalHandler) this.m_contentHandler) : null;
 368 
 369       if (isLexH)
 370       {
 371         lh.startCDATA();
 372       }
 373 
 374       dispatachChars(node);
 375 
 376       {
 377         if (isLexH)
 378         {
 379           lh.endCDATA();
 380         }
 381       }
 382     }
 383     break;
 384     case Node.TEXT_NODE :
 385     {
 386       //String data = ((Text) node).getData();
 387 
 388       if (nextIsRaw)
 389       {
 390         nextIsRaw = false;
 391 
 392         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, "");
 393         dispatachChars(node);
 394         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, "");
 395       }
 396       else
 397       {
 398         dispatachChars(node);
 399       }
 400     }
 401     break;
 402     case Node.ENTITY_REFERENCE_NODE :
 403     {
 404       EntityReference eref = (EntityReference) node;
 405 
 406       if (m_contentHandler instanceof LexicalHandler)
 407       {
 408         ((LexicalHandler) this.m_contentHandler).startEntity(
 409           eref.getNodeName());
 410       }
 411       else
 412       {
 413 
 414         // warning("Can not output entity to a pure SAX ContentHandler");
 415       }
 416     }
 417     break;
 418     default :
 419     }
 420   }
 421 
 422   /**
 423    * End processing of given node
 424    *
 425    *
 426    * @param node Node we just finished processing
 427    *
 428    * @throws org.xml.sax.SAXException
 429    */
 430   protected void endNode(Node node) throws org.xml.sax.SAXException
 431   {
 432 
 433     switch (node.getNodeType())
 434     {
 435     case Node.DOCUMENT_NODE :
 436       break;
 437 
 438     case Node.ELEMENT_NODE :
 439       String ns = m_dh.getNamespaceOfNode(node);
 440       if(null == ns)
 441         ns = "";
 442       this.m_contentHandler.endElement(ns,
 443                                          m_dh.getLocalNameOfNode(node),
 444                                          node.getNodeName());
 445 
 446       if (m_Serializer == null) {
 447       // Don't bother with endPrefixMapping calls if the ContentHandler is a
 448       // SerializationHandler because SerializationHandler's ignore the
 449       // endPrefixMapping() calls anyways. . . .  This is an optimization.
 450       Element elem_node = (Element) node;
 451       NamedNodeMap atts = elem_node.getAttributes();
 452       int nAttrs = atts.getLength();
 453 
 454       // do the endPrefixMapping calls in reverse order
 455       // of the startPrefixMapping calls
 456       for (int i = (nAttrs-1); 0 <= i; i--)
 457       {
 458         final Node attr = atts.item(i);
 459         final String attrName = attr.getNodeName();
 460         final int colon = attrName.indexOf(':');
 461         final String prefix;
 462 
 463         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 464         {
 465           // Use "" instead of null, as Xerces likes "" for the
 466           // name of the default namespace.  Fix attributed
 467           // to "Steven Murray" <smurray@ebt.com>.
 468           if (colon < 0)
 469             prefix = "";
 470           else
 471             prefix = attrName.substring(colon + 1);
 472 
 473           this.m_contentHandler.endPrefixMapping(prefix);
 474         }
 475         else if (colon > 0) {
 476             prefix = attrName.substring(0, colon);
 477             this.m_contentHandler.endPrefixMapping(prefix);
 478         }
 479       }
 480       {
 481           String uri = elem_node.getNamespaceURI();
 482           if (uri != null) {
 483               String prefix = elem_node.getPrefix();
 484               if (prefix==null)
 485                 prefix="";
 486               this.m_contentHandler.endPrefixMapping(prefix);
 487           }
 488       }
 489       }
 490       break;
 491     case Node.CDATA_SECTION_NODE :
 492       break;
 493     case Node.ENTITY_REFERENCE_NODE :
 494     {
 495       EntityReference eref = (EntityReference) node;
 496 
 497       if (m_contentHandler instanceof LexicalHandler)
 498       {
 499         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 500 
 501         lh.endEntity(eref.getNodeName());
 502       }
 503     }
 504     break;
 505     default :
 506     }
 507   }
 508 }  //TreeWalker