1 /*
   2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.utils;
  22 
  23 import com.sun.org.apache.xml.internal.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler;
  24 import javax.xml.transform.Result;
  25 import org.w3c.dom.Comment;
  26 import org.w3c.dom.Element;
  27 import org.w3c.dom.EntityReference;
  28 import org.w3c.dom.NamedNodeMap;
  29 import org.w3c.dom.Node;
  30 import org.w3c.dom.ProcessingInstruction;
  31 import org.w3c.dom.Text;
  32 
  33 import org.xml.sax.ContentHandler;
  34 import org.xml.sax.Locator;
  35 import org.xml.sax.ext.LexicalHandler;
  36 import org.xml.sax.helpers.LocatorImpl;
  37 
  38 /**
  39  * This class does a pre-order walk of the DOM tree, calling a ContentHandler
  40  * interface as it goes.
  41  * @xsl.usage advanced
  42  */
  43 
  44 public class TreeWalker
  45 {
  46 
  47   /** Local reference to a ContentHandler          */
  48   private ContentHandler m_contentHandler = null;
  49 
  50    /** Locator object for this TreeWalker          */
  51    private LocatorImpl m_locator = new LocatorImpl();
  52 
  53   /**
  54    * Get the ContentHandler used for the tree walk.
  55    *
  56    * @return the ContentHandler used for the tree walk
  57    */
  58   public ContentHandler getContentHandler()
  59   {
  60     return m_contentHandler;
  61   }
  62 
  63   /**
  64    * Get the ContentHandler used for the tree walk.
  65    *
  66    * @return the ContentHandler used for the tree walk
  67    */
  68   public void setContentHandler(ContentHandler ch)
  69   {
  70     m_contentHandler = ch;
  71   }
  72 
  73    /**
  74    * Constructor.
  75    * @param   contentHandler The implementation of the
  76    * @param   systemId System identifier for the document.
  77    * contentHandler operation (toXMLString, digest, ...)
  78    */
  79   public TreeWalker(ContentHandler contentHandler, String systemId)
  80   {
  81     this.m_contentHandler = contentHandler;
  82     if (m_contentHandler != null) {
  83         m_contentHandler.setDocumentLocator(m_locator);
  84     }
  85     if (systemId != null) {
  86         m_locator.setSystemId(systemId);
  87     }
  88   }
  89 
  90   /**
  91    * Constructor.
  92    * @param   contentHandler The implementation of the
  93    * contentHandler operation (toXMLString, digest, ...)
  94    */
  95   public TreeWalker(ContentHandler contentHandler)
  96   {
  97       this(contentHandler, null);
  98   }
  99 
 100   /**
 101    * Perform a pre-order traversal non-recursive style.
 102    *
 103    * Note that TreeWalker assumes that the subtree is intended to represent
 104    * a complete (though not necessarily well-formed) document and, during a
 105    * traversal, startDocument and endDocument will always be issued to the
 106    * SAX listener.
 107    *
 108    * @param pos Node in the tree where to start traversal
 109    *
 110    * @throws TransformerException
 111    */
 112   public void traverse(Node pos) throws org.xml.sax.SAXException
 113   {
 114         this.m_contentHandler.startDocument();
 115 
 116         traverseFragment(pos);
 117 
 118         this.m_contentHandler.endDocument();
 119   }
 120 
 121   /**
 122    * Perform a pre-order traversal non-recursive style.
 123    *
 124    * In contrast to the traverse() method this method will not issue
 125    * startDocument() and endDocument() events to the SAX listener.
 126    *
 127    * @param pos Node in the tree where to start traversal
 128    *
 129    * @throws TransformerException
 130    */
 131   public void traverseFragment(Node pos) throws org.xml.sax.SAXException
 132   {
 133     Node top = pos;
 134 
 135     while (null != pos)
 136     {
 137       startNode(pos);
 138 
 139       Node nextNode = pos.getFirstChild();
 140 
 141       while (null == nextNode)
 142       {
 143         endNode(pos);
 144 
 145         if (top.equals(pos))
 146           break;
 147 
 148         nextNode = pos.getNextSibling();
 149 
 150         if (null == nextNode)
 151         {
 152           pos = pos.getParentNode();
 153 
 154           if ((null == pos) || (top.equals(pos)))
 155           {
 156             if (null != pos)
 157               endNode(pos);
 158 
 159             nextNode = null;
 160 
 161             break;
 162           }
 163         }
 164       }
 165 
 166       pos = nextNode;
 167     }
 168   }
 169 
 170   /**
 171    * Perform a pre-order traversal non-recursive style.
 172 
 173    * Note that TreeWalker assumes that the subtree is intended to represent
 174    * a complete (though not necessarily well-formed) document and, during a
 175    * traversal, startDocument and endDocument will always be issued to the
 176    * SAX listener.
 177    *
 178    * @param pos Node in the tree where to start traversal
 179    * @param top Node in the tree where to end traversal
 180    *
 181    * @throws TransformerException
 182    */
 183   public void traverse(Node pos, Node top) throws org.xml.sax.SAXException
 184   {
 185 
 186         this.m_contentHandler.startDocument();
 187 
 188     while (null != pos)
 189     {
 190       startNode(pos);
 191 
 192       Node nextNode = pos.getFirstChild();
 193 
 194       while (null == nextNode)
 195       {
 196         endNode(pos);
 197 
 198         if ((null != top) && top.equals(pos))
 199           break;
 200 
 201         nextNode = pos.getNextSibling();
 202 
 203         if (null == nextNode)
 204         {
 205           pos = pos.getParentNode();
 206 
 207           if ((null == pos) || ((null != top) && top.equals(pos)))
 208           {
 209             nextNode = null;
 210 
 211             break;
 212           }
 213         }
 214       }
 215 
 216       pos = nextNode;
 217     }
 218     this.m_contentHandler.endDocument();
 219   }
 220 
 221   // Flag indicating whether following text to be processed is raw text
 222   boolean nextIsRaw = false;
 223 
 224   /**
 225    * Optimized dispatch of characters.
 226    */
 227   private final void dispatachChars(Node node)
 228      throws org.xml.sax.SAXException
 229   {
 230     if(m_contentHandler instanceof CharacterNodeHandler)
 231     {
 232       ((CharacterNodeHandler)m_contentHandler).characters(node);
 233     }
 234     else
 235     {
 236       String data = ((Text) node).getData();
 237       this.m_contentHandler.characters(data.toCharArray(), 0, data.length());
 238     }
 239   }
 240 
 241   /**
 242    * Start processing given node
 243    *
 244    *
 245    * @param node Node to process
 246    *
 247    * @throws org.xml.sax.SAXException
 248    */
 249   protected void startNode(Node node) throws org.xml.sax.SAXException
 250   {
 251 
 252     if (m_contentHandler instanceof NodeConsumer)
 253     {
 254       ((NodeConsumer) m_contentHandler).setOriginatingNode(node);
 255     }
 256 
 257                 if (node instanceof Locator)
 258                 {
 259                         Locator loc = (Locator)node;
 260                         m_locator.setColumnNumber(loc.getColumnNumber());
 261                         m_locator.setLineNumber(loc.getLineNumber());
 262                         m_locator.setPublicId(loc.getPublicId());
 263                         m_locator.setSystemId(loc.getSystemId());
 264                 }
 265                 else
 266                 {
 267                         m_locator.setColumnNumber(0);
 268       m_locator.setLineNumber(0);
 269                 }
 270 
 271     switch (node.getNodeType())
 272     {
 273     case Node.COMMENT_NODE :
 274     {
 275       String data = ((Comment) node).getData();
 276 
 277       if (m_contentHandler instanceof LexicalHandler)
 278       {
 279         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 280 
 281         lh.comment(data.toCharArray(), 0, data.length());
 282       }
 283     }
 284     break;
 285     case Node.DOCUMENT_FRAGMENT_NODE :
 286 
 287       // ??;
 288       break;
 289     case Node.DOCUMENT_NODE :
 290 
 291       break;
 292     case Node.ELEMENT_NODE :
 293       NamedNodeMap atts = ((Element) node).getAttributes();
 294       int nAttrs = atts.getLength();
 295 
 296       for (int i = 0; i < nAttrs; i++)
 297       {
 298         Node attr = atts.item(i);
 299         String attrName = attr.getNodeName();
 300 
 301         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 302         {
 303           int index;
 304           // Use "" instead of null, as Xerces likes "" for the
 305           // name of the default namespace.  Fix attributed
 306           // to "Steven Murray" <smurray@ebt.com>.
 307           String prefix = (index = attrName.indexOf(":")) < 0
 308                           ? "" : attrName.substring(index + 1);
 309 
 310           this.m_contentHandler.startPrefixMapping(prefix,
 311                                                    attr.getNodeValue());
 312         }
 313 
 314       }
 315 
 316       String ns = DOM2Helper.getNamespaceOfNode(node);
 317       if(null == ns)
 318         ns = "";
 319       this.m_contentHandler.startElement(ns,
 320                                          DOM2Helper.getLocalNameOfNode(node),
 321                                          node.getNodeName(),
 322                                          new AttList(atts));
 323       break;
 324     case Node.PROCESSING_INSTRUCTION_NODE :
 325     {
 326       ProcessingInstruction pi = (ProcessingInstruction) node;
 327       String name = pi.getNodeName();
 328 
 329       // String data = pi.getData();
 330       if (name.equals("xslt-next-is-raw"))
 331       {
 332         nextIsRaw = true;
 333       }
 334       else
 335       {
 336         this.m_contentHandler.processingInstruction(pi.getNodeName(),
 337                                                     pi.getData());
 338       }
 339     }
 340     break;
 341     case Node.CDATA_SECTION_NODE :
 342     {
 343       boolean isLexH = (m_contentHandler instanceof LexicalHandler);
 344       LexicalHandler lh = isLexH
 345                           ? ((LexicalHandler) this.m_contentHandler) : null;
 346 
 347       if (isLexH)
 348       {
 349         lh.startCDATA();
 350       }
 351 
 352       dispatachChars(node);
 353 
 354       {
 355         if (isLexH)
 356         {
 357           lh.endCDATA();
 358         }
 359       }
 360     }
 361     break;
 362     case Node.TEXT_NODE :
 363     {
 364       //String data = ((Text) node).getData();
 365 
 366       if (nextIsRaw)
 367       {
 368         nextIsRaw = false;
 369 
 370         m_contentHandler.processingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "");
 371         dispatachChars(node);
 372         m_contentHandler.processingInstruction(Result.PI_ENABLE_OUTPUT_ESCAPING, "");
 373       }
 374       else
 375       {
 376         dispatachChars(node);
 377       }
 378     }
 379     break;
 380     case Node.ENTITY_REFERENCE_NODE :
 381     {
 382       EntityReference eref = (EntityReference) node;
 383 
 384       if (m_contentHandler instanceof LexicalHandler)
 385       {
 386         ((LexicalHandler) this.m_contentHandler).startEntity(
 387           eref.getNodeName());
 388       }
 389       else
 390       {
 391 
 392         // warning("Can not output entity to a pure SAX ContentHandler");
 393       }
 394     }
 395     break;
 396     default :
 397     }
 398   }
 399 
 400   /**
 401    * End processing of given node
 402    *
 403    *
 404    * @param node Node we just finished processing
 405    *
 406    * @throws org.xml.sax.SAXException
 407    */
 408   protected void endNode(Node node) throws org.xml.sax.SAXException
 409   {
 410 
 411     switch (node.getNodeType())
 412     {
 413     case Node.DOCUMENT_NODE :
 414       break;
 415 
 416     case Node.ELEMENT_NODE :
 417       String ns = DOM2Helper.getNamespaceOfNode(node);
 418       if(null == ns)
 419         ns = "";
 420       this.m_contentHandler.endElement(ns,
 421               DOM2Helper.getLocalNameOfNode(node),
 422               node.getNodeName());
 423 
 424       NamedNodeMap atts = ((Element) node).getAttributes();
 425       int nAttrs = atts.getLength();
 426 
 427       for (int i = 0; i < nAttrs; i++)
 428       {
 429         Node attr = atts.item(i);
 430         String attrName = attr.getNodeName();
 431 
 432         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
 433         {
 434           int index;
 435           // Use "" instead of null, as Xerces likes "" for the
 436           // name of the default namespace.  Fix attributed
 437           // to "Steven Murray" <smurray@ebt.com>.
 438           String prefix = (index = attrName.indexOf(":")) < 0
 439                           ? "" : attrName.substring(index + 1);
 440 
 441           this.m_contentHandler.endPrefixMapping(prefix);
 442         }
 443       }
 444       break;
 445     case Node.CDATA_SECTION_NODE :
 446       break;
 447     case Node.ENTITY_REFERENCE_NODE :
 448     {
 449       EntityReference eref = (EntityReference) node;
 450 
 451       if (m_contentHandler instanceof LexicalHandler)
 452       {
 453         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
 454 
 455         lh.endEntity(eref.getNodeName());
 456       }
 457     }
 458     break;
 459     default :
 460     }
 461   }
 462 }  //TreeWalker