1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 package com.sun.org.apache.xml.internal.utils;
  21 
  22 import com.sun.org.apache.xalan.internal.utils.SecuritySupport;
  23 import java.io.File;
  24 
  25 import org.w3c.dom.Comment;
  26 import org.w3c.dom.Element;
  27 import org.w3c.dom.EntityReference;
  28 import org.w3c.dom.NamedNodeMap;
  29 import org.w3c.dom.Node;
  30 import org.w3c.dom.ProcessingInstruction;
  31 import org.w3c.dom.Text;
  32 
  33 import org.xml.sax.ContentHandler;
  34 import org.xml.sax.Locator;
  35 import org.xml.sax.ext.LexicalHandler;
  36 import org.xml.sax.helpers.LocatorImpl;
  37 
  38 /**
  39  * This class does a pre-order walk of the DOM tree, calling a ContentHandler
  40  * interface as it goes.
  41  * @xsl.usage advanced
  42  */
  43 
  44 public class TreeWalker
  45 {
  46 
  47   /** Local reference to a ContentHandler          */
  48   private ContentHandler m_contentHandler = null;
  49 
  50   // ARGHH!!  JAXP Uses Xerces without setting the namespace processing to ON!
  51   // DOM2Helper m_dh = new DOM2Helper();
  52 
  53   /** DomHelper for this TreeWalker          */
  54   protected DOMHelper m_dh;
  55 
  56         /** Locator object for this TreeWalker          */
  57         private LocatorImpl m_locator = new LocatorImpl();
  58 
  59   /**
  60    * Get the ContentHandler used for the tree walk.
  61    *
  62    * @return the ContentHandler used for the tree walk
  63    */
  64   public ContentHandler getContentHandler()
  65   {
  66     return m_contentHandler;
  67   }
  68 
  69   /**
  70    * Get the ContentHandler used for the tree walk.
  71    *
  72    * @return the ContentHandler used for the tree walk
  73    */
  74   public void setContentHandler(ContentHandler ch)
  75   {
  76     m_contentHandler = ch;
  77   }
  78 
  79         /**
  80    * Constructor.
  81    * @param   contentHandler The implementation of the
  82    * @param   systemId System identifier for the document.
  83    * contentHandler operation (toXMLString, digest, ...)
  84    */
  85   public TreeWalker(ContentHandler contentHandler, DOMHelper dh, String systemId)
  86   {
  87     this.m_contentHandler = contentHandler;
  88     m_contentHandler.setDocumentLocator(m_locator);
  89     if (systemId != null) {
  90         m_locator.setSystemId(systemId);
  91     }
  92     m_dh = dh;
  93   }
  94 
  95   /**
  96    * Constructor.
  97    * @param   contentHandler The implementation of the
  98    * contentHandler operation (toXMLString, digest, ...)
  99    */
 100   public TreeWalker(ContentHandler contentHandler, DOMHelper dh)
 101   {
 102     this.m_contentHandler = contentHandler;
 103     m_contentHandler.setDocumentLocator(m_locator);
 104     m_dh = dh;
 105   }
 106 
 107   /**
 108    * Constructor.
 109    * @param   contentHandler The implementation of the
 110    * contentHandler operation (toXMLString, digest, ...)
 111    */
 112   public TreeWalker(ContentHandler contentHandler)
 113   {
 114     this.m_contentHandler = contentHandler;
 115     if (m_contentHandler != null) {
 116         m_contentHandler.setDocumentLocator(m_locator);
 117     }
 118     m_dh = new DOM2Helper();
 119   }
 120 
 121   /**
 122    * Perform a pre-order traversal non-recursive style.
 123    *
 124    * Note that TreeWalker assumes that the subtree is intended to represent
 125    * a complete (though not necessarily well-formed) document and, during a
 126    * traversal, startDocument and endDocument will always be issued to the
 127    * SAX listener.
 128    *
 129    * @param pos Node in the tree where to start traversal
 130    *
 131    * @throws TransformerException
 132    */
 133   public void traverse(Node pos) throws org.xml.sax.SAXException
 134   {
 135         this.m_contentHandler.startDocument();
 136 
 137         traverseFragment(pos);
 138 
 139         this.m_contentHandler.endDocument();
 140   }
 141 
 142   /**
 143    * Perform a pre-order traversal non-recursive style.
 144    *
 145    * In contrast to the traverse() method this method will not issue
 146    * startDocument() and endDocument() events to the SAX listener.
 147    *
 148    * @param pos Node in the tree where to start traversal
 149    *
 150    * @throws TransformerException
 151    */
 152   public void traverseFragment(Node pos) throws org.xml.sax.SAXException
 153   {
 154     Node top = pos;
 155 
 156     while (null != pos)
 157     {
 158       startNode(pos);
 159 
 160       Node nextNode = pos.getFirstChild();
 161 
 162       while (null == nextNode)
 163       {
 164         endNode(pos);
 165 
 166         if (top.equals(pos))
 167           break;
 168 
 169         nextNode = pos.getNextSibling();
 170 
 171         if (null == nextNode)
 172         {
 173           pos = pos.getParentNode();
 174 
 175           if ((null == pos) || (top.equals(pos)))
 176           {
 177             if (null != pos)
 178               endNode(pos);
 179 
 180             nextNode = null;
 181 
 182             break;
 183           }
 184         }
 185       }
 186 
 187       pos = nextNode;
 188     }
 189   }
 190 
 191   /**
 192    * Perform a pre-order traversal non-recursive style.
 193 
 194    * Note that TreeWalker assumes that the subtree is intended to represent
 195    * a complete (though not necessarily well-formed) document and, during a
 196    * traversal, startDocument and endDocument will always be issued to the
 197    * SAX listener.
 198    *
 199    * @param pos Node in the tree where to start traversal
 200    * @param top Node in the tree where to end traversal
 201    *
 202    * @throws TransformerException
 203    */
 204   public void traverse(Node pos, Node top) throws org.xml.sax.SAXException
 205   {
 206 
 207         this.m_contentHandler.startDocument();
 208 
 209     while (null != pos)
 210     {
 211       startNode(pos);
 212 
 213       Node nextNode = pos.getFirstChild();
 214 
 215       while (null == nextNode)
 216       {
 217         endNode(pos);
 218 
 219         if ((null != top) && top.equals(pos))
 220           break;
 221 
 222         nextNode = pos.getNextSibling();
 223 
 224         if (null == nextNode)
 225         {
 226           pos = pos.getParentNode();
 227 
 228           if ((null == pos) || ((null != top) && top.equals(pos)))
 229           {
 230             nextNode = null;
 231 
 232             break;
 233           }
 234         }
 235       }
 236 
 237       pos = nextNode;
 238     }
 239     this.m_contentHandler.endDocument();
 240   }
 241 
 242   /** Flag indicating whether following text to be processed is raw text          */
 243   boolean nextIsRaw = false;
 244 
 245   /**
 246    * Optimized dispatch of characters.
 247    */
 248   private final void dispatachChars(Node node)
 249      throws org.xml.sax.SAXException
 250   {
 251     if(m_contentHandler instanceof com.sun.org.apache.xml.internal.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)
 252     {
 253       ((com.sun.org.apache.xml.internal.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)m_contentHandler).characters(node);
 254     }
 255     else
 256     {
 257       String data = ((Text) node).getData();
 258       this.m_contentHandler.characters(data.toCharArray(), 0, data.length());
 259     }
 260   }
 261 
 262   /**
 263    * Start processing given node
 264    *
 265    *
 266    * @param node Node to process
 267    *
 268    * @throws org.xml.sax.SAXException
 269    */
 270   protected void startNode(Node node) throws org.xml.sax.SAXException
 271   {
 272 
 273     if (m_contentHandler instanceof NodeConsumer)
 274     {
 275       ((NodeConsumer) m_contentHandler).setOriginatingNode(node);
 276     }
 277 
 278                 if (node instanceof Locator)
 279                 {
 280                         Locator loc = (Locator)node;
 281                         m_locator.setColumnNumber(loc.getColumnNumber());
 282                         m_locator.setLineNumber(loc.getLineNumber());
 283                         m_locator.setPublicId(loc.getPublicId());
 284                         m_locator.setSystemId(loc.getSystemId());
 285                 }
 286                 else
 287                 {
 288                         m_locator.setColumnNumber(0);
 289       m_locator.setLineNumber(0);
 290                 }
 291 
 292     switch (node.getNodeType())
 293     {
 294     case Node.COMMENT_NODE :
 295     {
 296       String data = ((Comment) node).getData();
 297 
 298       if (m_contentHandler instanceof LexicalHandler)
 299       {
 300         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 301 
 302         lh.comment(data.toCharArray(), 0, data.length());
 303       }
 304     }
 305     break;
 306     case Node.DOCUMENT_FRAGMENT_NODE :
 307 
 308       // ??;
 309       break;
 310     case Node.DOCUMENT_NODE :
 311 
 312       break;
 313     case Node.ELEMENT_NODE :
 314       NamedNodeMap atts = ((Element) node).getAttributes();
 315       int nAttrs = atts.getLength();
 316       // System.out.println("TreeWalker#startNode: "+node.getNodeName());
 317 
 318       for (int i = 0; i < nAttrs; i++)
 319       {
 320         Node attr = atts.item(i);
 321         String attrName = attr.getNodeName();
 322 
 323         // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue());
 324         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 325         {
 326           // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue());
 327           int index;
 328           // Use "" instead of null, as Xerces likes "" for the
 329           // name of the default namespace.  Fix attributed
 330           // to "Steven Murray" <smurray@ebt.com>.
 331           String prefix = (index = attrName.indexOf(":")) < 0
 332                           ? "" : attrName.substring(index + 1);
 333 
 334           this.m_contentHandler.startPrefixMapping(prefix,
 335                                                    attr.getNodeValue());
 336         }
 337 
 338       }
 339 
 340       // System.out.println("m_dh.getNamespaceOfNode(node): "+m_dh.getNamespaceOfNode(node));
 341       // System.out.println("m_dh.getLocalNameOfNode(node): "+m_dh.getLocalNameOfNode(node));
 342       String ns = m_dh.getNamespaceOfNode(node);
 343       if(null == ns)
 344         ns = "";
 345       this.m_contentHandler.startElement(ns,
 346                                          m_dh.getLocalNameOfNode(node),
 347                                          node.getNodeName(),
 348                                          new AttList(atts, m_dh));
 349       break;
 350     case Node.PROCESSING_INSTRUCTION_NODE :
 351     {
 352       ProcessingInstruction pi = (ProcessingInstruction) node;
 353       String name = pi.getNodeName();
 354 
 355       // String data = pi.getData();
 356       if (name.equals("xslt-next-is-raw"))
 357       {
 358         nextIsRaw = true;
 359       }
 360       else
 361       {
 362         this.m_contentHandler.processingInstruction(pi.getNodeName(),
 363                                                     pi.getData());
 364       }
 365     }
 366     break;
 367     case Node.CDATA_SECTION_NODE :
 368     {
 369       boolean isLexH = (m_contentHandler instanceof LexicalHandler);
 370       LexicalHandler lh = isLexH
 371                           ? ((LexicalHandler) this.m_contentHandler) : null;
 372 
 373       if (isLexH)
 374       {
 375         lh.startCDATA();
 376       }
 377 
 378       dispatachChars(node);
 379 
 380       {
 381         if (isLexH)
 382         {
 383           lh.endCDATA();
 384         }
 385       }
 386     }
 387     break;
 388     case Node.TEXT_NODE :
 389     {
 390       //String data = ((Text) node).getData();
 391 
 392       if (nextIsRaw)
 393       {
 394         nextIsRaw = false;
 395 
 396         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, "");
 397         dispatachChars(node);
 398         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, "");
 399       }
 400       else
 401       {
 402         dispatachChars(node);
 403       }
 404     }
 405     break;
 406     case Node.ENTITY_REFERENCE_NODE :
 407     {
 408       EntityReference eref = (EntityReference) node;
 409 
 410       if (m_contentHandler instanceof LexicalHandler)
 411       {
 412         ((LexicalHandler) this.m_contentHandler).startEntity(
 413           eref.getNodeName());
 414       }
 415       else
 416       {
 417 
 418         // warning("Can not output entity to a pure SAX ContentHandler");
 419       }
 420     }
 421     break;
 422     default :
 423     }
 424   }
 425 
 426   /**
 427    * End processing of given node
 428    *
 429    *
 430    * @param node Node we just finished processing
 431    *
 432    * @throws org.xml.sax.SAXException
 433    */
 434   protected void endNode(Node node) throws org.xml.sax.SAXException
 435   {
 436 
 437     switch (node.getNodeType())
 438     {
 439     case Node.DOCUMENT_NODE :
 440       break;
 441 
 442     case Node.ELEMENT_NODE :
 443       String ns = m_dh.getNamespaceOfNode(node);
 444       if(null == ns)
 445         ns = "";
 446       this.m_contentHandler.endElement(ns,
 447                                          m_dh.getLocalNameOfNode(node),
 448                                          node.getNodeName());
 449 
 450       NamedNodeMap atts = ((Element) node).getAttributes();
 451       int nAttrs = atts.getLength();
 452 
 453       for (int i = 0; i < nAttrs; i++)
 454       {
 455         Node attr = atts.item(i);
 456         String attrName = attr.getNodeName();
 457 
 458         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 459         {
 460           int index;
 461           // Use "" instead of null, as Xerces likes "" for the
 462           // name of the default namespace.  Fix attributed
 463           // to "Steven Murray" <smurray@ebt.com>.
 464           String prefix = (index = attrName.indexOf(":")) < 0
 465                           ? "" : attrName.substring(index + 1);
 466 
 467           this.m_contentHandler.endPrefixMapping(prefix);
 468         }
 469       }
 470       break;
 471     case Node.CDATA_SECTION_NODE :
 472       break;
 473     case Node.ENTITY_REFERENCE_NODE :
 474     {
 475       EntityReference eref = (EntityReference) node;
 476 
 477       if (m_contentHandler instanceof LexicalHandler)
 478       {
 479         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 480 
 481         lh.endEntity(eref.getNodeName());
 482       }
 483     }
 484     break;
 485     default :
 486     }
 487   }
 488 }  //TreeWalker