1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
  23 
  24 import com.sun.org.apache.xml.internal.dtm.DTM;
  25 import com.sun.org.apache.xml.internal.dtm.DTMManager;
  26 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  27 import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
  28 import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
  29 import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
  30 import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
  31 import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  32 import com.sun.org.apache.xml.internal.res.XMLMessages;
  33 import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
  34 import com.sun.org.apache.xml.internal.utils.QName;
  35 import com.sun.org.apache.xml.internal.utils.StringBufferPool;
  36 import com.sun.org.apache.xml.internal.utils.TreeWalker;
  37 import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
  38 import com.sun.org.apache.xml.internal.utils.XMLString;
  39 import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  40 import java.util.ArrayList;
  41 import java.util.List;
  42 import javax.xml.transform.SourceLocator;
  43 import javax.xml.transform.dom.DOMSource;
  44 import org.w3c.dom.Attr;
  45 import org.w3c.dom.Document;
  46 import org.w3c.dom.DocumentType;
  47 import org.w3c.dom.Element;
  48 import org.w3c.dom.Entity;
  49 import org.w3c.dom.NamedNodeMap;
  50 import org.w3c.dom.Node;
  51 import org.xml.sax.ContentHandler;
  52 
  53 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
  54  * DTM API.
  55  *
  56  * Note that it doesn't necessarily represent a full Document
  57  * tree. You can wrap a DOM2DTM around a specific node and its subtree
  58  * and the right things should happen. (I don't _think_ we currently
  59  * support DocumentFrgment nodes as roots, though that might be worth
  60  * considering.)
  61  *
  62  * Note too that we do not currently attempt to track document
  63  * mutation. If you alter the DOM after wrapping DOM2DTM around it,
  64  * all bets are off.
  65  * */
  66 public class DOM2DTM extends DTMDefaultBaseIterators
  67 {
  68   static final boolean JJK_DEBUG=false;
  69   static final boolean JJK_NEWCODE=true;
  70 
  71   /** Manefest constant
  72    */
  73   static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
  74 
  75   /** The current position in the DOM tree. Last node examined for
  76    * possible copying to DTM. */
  77   transient private Node m_pos;
  78   /** The current position in the DTM tree. Who children get appended to. */
  79   private int m_last_parent=0;
  80   /** The current position in the DTM tree. Who children reference as their
  81    * previous sib. */
  82   private int m_last_kid=NULL;
  83 
  84   /** The top of the subtree.
  85    * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
  86    * */
  87   transient private Node m_root;
  88 
  89   /** True iff the first element has been processed. This is used to control
  90       synthesis of the implied xml: namespace declaration node. */
  91   boolean m_processedFirstElement=false;
  92 
  93   /** true if ALL the nodes in the m_root subtree have been processed;
  94    * false if our incremental build has not yet finished scanning the
  95    * DOM tree.  */
  96   transient private boolean m_nodesAreProcessed;
  97 
  98   /** The node objects.  The instance part of the handle indexes
  99    * directly into this vector.  Each DTM node may actually be
 100    * composed of several DOM nodes (for example, if logically-adjacent
 101    * Text/CDATASection nodes in the DOM have been coalesced into a
 102    * single DTM Text node); this table points only to the first in
 103    * that sequence. */
 104   protected List<Node> m_nodes = new ArrayList<>();
 105 
 106   /**
 107    * Construct a DOM2DTM object from a DOM node.
 108    *
 109    * @param mgr The DTMManager who owns this DTM.
 110    * @param domSource the DOM source that this DTM will wrap.
 111    * @param dtmIdentity The DTM identity ID for this DTM.
 112    * @param whiteSpaceFilter The white space filter for this DTM, which may
 113    *                         be null.
 114    * @param xstringfactory XMLString factory for creating character content.
 115    * @param doIndexing true if the caller considers it worth it to use
 116    *                   indexing schemes.
 117    */
 118   public DOM2DTM(DTMManager mgr, DOMSource domSource,
 119                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,
 120                  XMLStringFactory xstringfactory,
 121                  boolean doIndexing)
 122   {
 123     super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
 124           xstringfactory, doIndexing);
 125 
 126     // Initialize DOM navigation
 127     m_pos=m_root = domSource.getNode();
 128     // Initialize DTM navigation
 129     m_last_parent=m_last_kid=NULL;
 130     m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
 131 
 132     // Apparently the domSource root may not actually be the
 133     // Document node. If it's an Element node, we need to immediately
 134     // add its attributes. Adapted from nextNode().
 135     // %REVIEW% Move this logic into addNode and recurse? Cleaner!
 136     //
 137     // (If it's an EntityReference node, we're probably scrod. For now
 138     // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
 139                 //
 140                 // %ISSUE% What about inherited namespaces in this case?
 141                 // Do we need to special-case initialize them into the DTM model?
 142     if(ELEMENT_NODE == m_root.getNodeType())
 143     {
 144       NamedNodeMap attrs=m_root.getAttributes();
 145       int attrsize=(attrs==null) ? 0 : attrs.getLength();
 146       if(attrsize>0)
 147       {
 148         int attrIndex=NULL; // start with no previous sib
 149         for(int i=0;i<attrsize;++i)
 150         {
 151           // No need to force nodetype in this case;
 152           // addNode() will take care of switching it from
 153           // Attr to Namespace if necessary.
 154           attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
 155           m_firstch.setElementAt(DTM.NULL,attrIndex);
 156         }
 157         // Terminate list of attrs, and make sure they aren't
 158         // considered children of the element
 159         m_nextsib.setElementAt(DTM.NULL,attrIndex);
 160 
 161         // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
 162       } // if attrs exist
 163     } //if(ELEMENT_NODE)
 164 
 165     // Initialize DTM-completed status
 166     m_nodesAreProcessed = false;
 167   }
 168 
 169   /**
 170    * Construct the node map from the node.
 171    *
 172    * @param node The node that is to be added to the DTM.
 173    * @param parentIndex The current parent index.
 174    * @param previousSibling The previous sibling index.
 175    * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
 176    *    Used to force nodes to Text rather than CDATASection when their
 177    *    coalesced value includes ordinary Text nodes (current DTM behavior).
 178    *
 179    * @return The index identity of the node that was added.
 180    */
 181   protected int addNode(Node node, int parentIndex,
 182                         int previousSibling, int forceNodeType)
 183   {
 184     int nodeIndex = m_nodes.size();
 185 
 186     // Have we overflowed a DTM Identity's addressing range?
 187     if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
 188     {
 189       try
 190       {
 191         if(m_mgr==null)
 192           throw new ClassCastException();
 193 
 194                                 // Handle as Extended Addressing
 195         DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
 196         int id=mgrD.getFirstFreeDTMID();
 197         mgrD.addDTM(this,id,nodeIndex);
 198         m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
 199       }
 200       catch(ClassCastException e)
 201       {
 202         // %REVIEW% Wrong error message, but I've been told we're trying
 203         // not to add messages right not for I18N reasons.
 204         // %REVIEW% Should this be a Fatal Error?
 205         error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
 206       }
 207     }
 208 
 209     m_size++;
 210     // ensureSize(nodeIndex);
 211 
 212     int type;
 213     if(NULL==forceNodeType)
 214         type = node.getNodeType();
 215     else
 216         type=forceNodeType;
 217 
 218     // %REVIEW% The Namespace Spec currently says that Namespaces are
 219     // processed in a non-namespace-aware manner, by matching the
 220     // QName, even though there is in fact a namespace assigned to
 221     // these nodes in the DOM. If and when that changes, we will have
 222     // to consider whether we check the namespace-for-namespaces
 223     // rather than the node name.
 224     //
 225     // %TBD% Note that the DOM does not necessarily explicitly declare
 226     // all the namespaces it uses. DOM Level 3 will introduce a
 227     // namespace-normalization operation which reconciles that, and we
 228     // can request that users invoke it or otherwise ensure that the
 229     // tree is namespace-well-formed before passing the DOM to Xalan.
 230     // But if they don't, what should we do about it? We probably
 231     // don't want to alter the source DOM (and may not be able to do
 232     // so if it's read-only). The best available answer might be to
 233     // synthesize additional DTM Namespace Nodes that don't correspond
 234     // to DOM Attr Nodes.
 235     if (Node.ATTRIBUTE_NODE == type)
 236     {
 237       String name = node.getNodeName();
 238 
 239       if (name.startsWith("xmlns:") || name.equals("xmlns"))
 240       {
 241         type = DTM.NAMESPACE_NODE;
 242       }
 243     }
 244 
 245     m_nodes.add(node);
 246 
 247     m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
 248     m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
 249     m_prevsib.setElementAt(previousSibling,nodeIndex);
 250     m_parent.setElementAt(parentIndex,nodeIndex);
 251 
 252     if(DTM.NULL != parentIndex &&
 253        type != DTM.ATTRIBUTE_NODE &&
 254        type != DTM.NAMESPACE_NODE)
 255     {
 256       // If the DTM parent had no children, this becomes its first child.
 257       if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
 258         m_firstch.setElementAt(nodeIndex,parentIndex);
 259     }
 260 
 261     String nsURI = node.getNamespaceURI();
 262 
 263     // Deal with the difference between Namespace spec and XSLT
 264     // definitions of local name. (The former says PIs don't have
 265     // localnames; the latter says they do.)
 266     String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?
 267                          node.getNodeName() :
 268                          node.getLocalName();
 269 
 270     // Hack to make DOM1 sort of work...
 271     if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
 272         && null == localName)
 273       localName = node.getNodeName(); // -sb
 274 
 275     ExpandedNameTable exnt = m_expandedNameTable;
 276 
 277     // %TBD% Nodes created with the old non-namespace-aware DOM
 278     // calls createElement() and createAttribute() will never have a
 279     // localname. That will cause their expandedNameID to be just the
 280     // nodeType... which will keep them from being matched
 281     // successfully by name. Since the DOM makes no promise that
 282     // those will participate in namespace processing, this is
 283     // officially accepted as Not Our Fault. But it might be nice to
 284     // issue a diagnostic message!
 285     if(node.getLocalName()==null &&
 286        (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
 287       {
 288         // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
 289       }
 290 
 291     int expandedNameID = (null != localName)
 292        ? exnt.getExpandedTypeID(nsURI, localName, type) :
 293          exnt.getExpandedTypeID(type);
 294 
 295     m_exptype.setElementAt(expandedNameID,nodeIndex);
 296 
 297     indexNode(expandedNameID, nodeIndex);
 298 
 299     if (DTM.NULL != previousSibling)
 300       m_nextsib.setElementAt(nodeIndex,previousSibling);
 301 
 302     // This should be done after m_exptype has been set, and probably should
 303     // always be the last thing we do
 304     if (type == DTM.NAMESPACE_NODE)
 305         declareNamespaceInContext(parentIndex,nodeIndex);
 306 
 307     return nodeIndex;
 308   }
 309 
 310   /**
 311    * Get the number of nodes that have been added.
 312    */
 313   public int getNumberOfNodes()
 314   {
 315     return m_nodes.size();
 316   }
 317 
 318  /**
 319    * This method iterates to the next node that will be added to the table.
 320    * Each call to this method adds a new node to the table, unless the end
 321    * is reached, in which case it returns null.
 322    *
 323    * @return The true if a next node is found or false if
 324    *         there are no more nodes.
 325    */
 326   protected boolean nextNode()
 327   {
 328     // Non-recursive one-fetch-at-a-time depth-first traversal with
 329     // attribute/namespace nodes and white-space stripping.
 330     // Navigating the DOM is simple, navigating the DTM is simple;
 331     // keeping track of both at once is a trifle baroque but at least
 332     // we've avoided most of the special cases.
 333     if (m_nodesAreProcessed)
 334       return false;
 335 
 336     // %REVIEW% Is this local copy Really Useful from a performance
 337     // point of view?  Or is this a false microoptimization?
 338     Node pos=m_pos;
 339     Node next=null;
 340     int nexttype=NULL;
 341 
 342     // Navigate DOM tree
 343     do
 344       {
 345         // Look down to first child.
 346         if (pos.hasChildNodes())
 347           {
 348             next = pos.getFirstChild();
 349 
 350             // %REVIEW% There's probably a more elegant way to skip
 351             // the doctype. (Just let it go and Suppress it?
 352             if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
 353               next=next.getNextSibling();
 354 
 355             // Push DTM context -- except for children of Entity References,
 356             // which have no DTM equivalent and cause no DTM navigation.
 357             if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
 358               {
 359                 m_last_parent=m_last_kid;
 360                 m_last_kid=NULL;
 361                 // Whitespace-handler context stacking
 362                 if(null != m_wsfilter)
 363                 {
 364                   short wsv =
 365                     m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
 366                   boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
 367                     ? getShouldStripWhitespace()
 368                     : (DTMWSFilter.STRIP == wsv);
 369                   pushShouldStripWhitespace(shouldStrip);
 370                 } // if(m_wsfilter)
 371               }
 372           }
 373 
 374         // If that fails, look up and right (but not past root!)
 375         else
 376           {
 377             if(m_last_kid!=NULL)
 378               {
 379                 // Last node posted at this level had no more children
 380                 // If it has _no_ children, we need to record that.
 381                 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
 382                   m_firstch.setElementAt(NULL,m_last_kid);
 383               }
 384 
 385             while(m_last_parent != NULL)
 386               {
 387                 // %REVIEW% There's probably a more elegant way to
 388                 // skip the doctype. (Just let it go and Suppress it?
 389                 next = pos.getNextSibling();
 390                 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
 391                   next=next.getNextSibling();
 392 
 393                 if(next!=null)
 394                   break; // Found it!
 395 
 396                 // No next-sibling found. Pop the DOM.
 397                 pos=pos.getParentNode();
 398                 if(pos==null)
 399                   {
 400                     // %TBD% Should never arise, but I want to be sure of that...
 401                     if(JJK_DEBUG)
 402                       {
 403                         System.out.println("***** DOM2DTM Pop Control Flow problem");
 404                         for(;;); // Freeze right here!
 405                       }
 406                   }
 407 
 408                 // The only parents in the DTM are Elements.  However,
 409                 // the DOM could contain EntityReferences.  If we
 410                 // encounter one, pop it _without_ popping DTM.
 411                 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
 412                   {
 413                     // Nothing needs doing
 414                     if(JJK_DEBUG)
 415                       System.out.println("***** DOM2DTM popping EntRef");
 416                   }
 417                 else
 418                   {
 419                     popShouldStripWhitespace();
 420                     // Fix and pop DTM
 421                     if(m_last_kid==NULL)
 422                       m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
 423                     else
 424                       m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
 425                     m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
 426                   }
 427               }
 428             if(m_last_parent==NULL)
 429               next=null;
 430           }
 431 
 432         if(next!=null)
 433           nexttype=next.getNodeType();
 434 
 435         // If it's an entity ref, advance past it.
 436         //
 437         // %REVIEW% Should we let this out the door and just suppress it?
 438         // More work, but simpler code, more likely to be correct, and
 439         // it doesn't happen very often. We'd get rid of the loop too.
 440         if (ENTITY_REFERENCE_NODE == nexttype)
 441           pos=next;
 442       }
 443     while (ENTITY_REFERENCE_NODE == nexttype);
 444 
 445     // Did we run out of the tree?
 446     if(next==null)
 447       {
 448         m_nextsib.setElementAt(NULL,0);
 449         m_nodesAreProcessed = true;
 450         m_pos=null;
 451 
 452         if(JJK_DEBUG)
 453           {
 454             System.out.println("***** DOM2DTM Crosscheck:");
 455             for(int i=0;i<m_nodes.size();++i)
 456               System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
 457           }
 458 
 459         return false;
 460       }
 461 
 462     // Text needs some special handling:
 463     //
 464     // DTM may skip whitespace. This is handled by the suppressNode flag, which
 465     // when true will keep the DTM node from being created.
 466     //
 467     // DTM only directly records the first DOM node of any logically-contiguous
 468     // sequence. The lastTextNode value will be set to the last node in the
 469     // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
 470     // advance next over this whole block. Should be simpler than special-casing
 471     // the above loop for "Was the logically-preceeding sibling a text node".
 472     //
 473     // Finally, a DTM node should be considered a CDATASection only if all the
 474     // contiguous text it covers is CDATASections. The first Text should
 475     // force DTM to Text.
 476 
 477     boolean suppressNode=false;
 478     Node lastTextNode=null;
 479 
 480     nexttype=next.getNodeType();
 481 
 482     // nexttype=pos.getNodeType();
 483     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
 484       {
 485         // If filtering, initially assume we're going to suppress the node
 486         suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
 487 
 488         // Scan logically contiguous text (siblings, plus "flattening"
 489         // of entity reference boundaries).
 490         Node n=next;
 491         while(n!=null)
 492           {
 493             lastTextNode=n;
 494             // Any Text node means DTM considers it all Text
 495             if(TEXT_NODE == n.getNodeType())
 496               nexttype=TEXT_NODE;
 497             // Any non-whitespace in this sequence blocks whitespace
 498             // suppression
 499             suppressNode &=
 500               XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
 501 
 502             n=logicalNextDOMTextNode(n);
 503           }
 504       }
 505 
 506     // Special handling for PIs: Some DOMs represent the XML
 507     // Declaration as a PI. This is officially incorrect, per the DOM
 508     // spec, but is considered a "wrong but tolerable" temporary
 509     // workaround pending proper handling of these fields in DOM Level
 510     // 3. We want to recognize and reject that case.
 511     else if(PROCESSING_INSTRUCTION_NODE==nexttype)
 512       {
 513         suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
 514       }
 515 
 516 
 517     if(!suppressNode)
 518       {
 519         // Inserting next. NOTE that we force the node type; for
 520         // coalesced Text, this records CDATASections adjacent to
 521         // ordinary Text as Text.
 522         int nextindex=addNode(next,m_last_parent,m_last_kid,
 523                               nexttype);
 524 
 525         m_last_kid=nextindex;
 526 
 527         if(ELEMENT_NODE == nexttype)
 528           {
 529             int attrIndex=NULL; // start with no previous sib
 530             // Process attributes _now_, rather than waiting.
 531             // Simpler control flow, makes NS cache available immediately.
 532             NamedNodeMap attrs=next.getAttributes();
 533             int attrsize=(attrs==null) ? 0 : attrs.getLength();
 534             if(attrsize>0)
 535               {
 536                 for(int i=0;i<attrsize;++i)
 537                   {
 538                     // No need to force nodetype in this case;
 539                     // addNode() will take care of switching it from
 540                     // Attr to Namespace if necessary.
 541                     attrIndex=addNode(attrs.item(i),
 542                                       nextindex,attrIndex,NULL);
 543                     m_firstch.setElementAt(DTM.NULL,attrIndex);
 544 
 545                     // If the xml: prefix is explicitly declared
 546                     // we don't need to synthesize one.
 547                     //
 548                     // NOTE that XML Namespaces were not originally
 549                     // defined as being namespace-aware (grrr), and
 550                     // while the W3C is planning to fix this it's
 551                     // safer for now to test the QName and trust the
 552                     // parsers to prevent anyone from redefining the
 553                     // reserved xmlns: prefix
 554                     if(!m_processedFirstElement
 555                        && "xmlns:xml".equals(attrs.item(i).getNodeName()))
 556                       m_processedFirstElement=true;
 557                   }
 558                 // Terminate list of attrs, and make sure they aren't
 559                 // considered children of the element
 560               } // if attrs exist
 561             if(!m_processedFirstElement)
 562             {
 563               // The DOM might not have an explicit declaration for the
 564               // implicit "xml:" prefix, but the XPath data model
 565               // requires that this appear as a Namespace Node so we
 566               // have to synthesize one. You can think of this as
 567               // being a default attribute defined by the XML
 568               // Namespaces spec rather than by the DTD.
 569               attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
 570                                                                                                                                         (Element)next,"xml",NAMESPACE_DECL_NS,
 571                                                                                                                                         makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
 572                                                                                                                                         ),
 573                                 nextindex,attrIndex,NULL);
 574               m_firstch.setElementAt(DTM.NULL,attrIndex);
 575               m_processedFirstElement=true;
 576             }
 577             if(attrIndex!=NULL)
 578               m_nextsib.setElementAt(DTM.NULL,attrIndex);
 579           } //if(ELEMENT_NODE)
 580       } // (if !suppressNode)
 581 
 582     // Text postprocessing: Act on values stored above
 583     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
 584       {
 585         // %TBD% If nexttype was forced to TEXT, patch the DTM node
 586 
 587         next=lastTextNode;      // Advance the DOM cursor over contiguous text
 588       }
 589 
 590     // Remember where we left off.
 591     m_pos=next;
 592     return true;
 593   }
 594 
 595 
 596   /**
 597    * Return an DOM node for the given node.
 598    *
 599    * @param nodeHandle The node ID.
 600    *
 601    * @return A node representation of the DTM node.
 602    */
 603   public Node getNode(int nodeHandle)
 604   {
 605 
 606     int identity = makeNodeIdentity(nodeHandle);
 607 
 608     return m_nodes.get(identity);
 609   }
 610 
 611   /**
 612    * Get a Node from an identity index.
 613    *
 614    * NEEDSDOC @param nodeIdentity
 615    *
 616    * NEEDSDOC ($objectName$) @return
 617    */
 618   protected Node lookupNode(int nodeIdentity)
 619   {
 620     return m_nodes.get(nodeIdentity);
 621   }
 622 
 623   /**
 624    * Get the next node identity value in the list, and call the iterator
 625    * if it hasn't been added yet.
 626    *
 627    * @param identity The node identity (index).
 628    * @return identity+1, or DTM.NULL.
 629    */
 630   protected int getNextNodeIdentity(int identity)
 631   {
 632 
 633     identity += 1;
 634 
 635     if (identity >= m_nodes.size())
 636     {
 637       if (!nextNode())
 638         identity = DTM.NULL;
 639     }
 640 
 641     return identity;
 642   }
 643 
 644   /**
 645    * Get the handle from a Node.
 646    * <p>%OPT% This will be pretty slow.</p>
 647    *
 648    * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
 649    * walk down DTM reconstructing path) might be considerably faster
 650    * on later nodes in large documents. That might also imply improving
 651    * this call to handle nodes which would be in this DTM but
 652    * have not yet been built, which might or might not be a Good Thing.</p>
 653    *
 654    * %REVIEW% This relies on being able to test node-identity via
 655    * object-identity. DTM2DOM proxying is a great example of a case where
 656    * that doesn't work. DOM Level 3 will provide the isSameNode() method
 657    * to fix that, but until then this is going to be flaky.
 658    *
 659    * @param node A node, which may be null.
 660    *
 661    * @return The node handle or <code>DTM.NULL</code>.
 662    */
 663   private int getHandleFromNode(Node node)
 664   {
 665     if (null != node)
 666     {
 667       int len = m_nodes.size();
 668       boolean isMore;
 669       int i = 0;
 670       do
 671       {
 672         for (; i < len; i++)
 673         {
 674           if (m_nodes.get(i) == node)
 675             return makeNodeHandle(i);
 676         }
 677 
 678         isMore = nextNode();
 679 
 680         len = m_nodes.size();
 681 
 682       }
 683       while(isMore || i < len);
 684     }
 685 
 686     return DTM.NULL;
 687   }
 688 
 689   /** Get the handle from a Node. This is a more robust version of
 690    * getHandleFromNode, intended to be usable by the public.
 691    *
 692    * <p>%OPT% This will be pretty slow.</p>
 693    *
 694    * %REVIEW% This relies on being able to test node-identity via
 695    * object-identity. DTM2DOM proxying is a great example of a case where
 696    * that doesn't work. DOM Level 3 will provide the isSameNode() method
 697    * to fix that, but until then this is going to be flaky.
 698    *
 699    * @param node A node, which may be null.
 700    *
 701    * @return The node handle or <code>DTM.NULL</code>.  */
 702   public int getHandleOfNode(Node node)
 703   {
 704     if (null != node)
 705     {
 706       // Is Node actually within the same document? If not, don't search!
 707       // This would be easier if m_root was always the Document node, but
 708       // we decided to allow wrapping a DTM around a subtree.
 709       if((m_root==node) ||
 710          (m_root.getNodeType()==DOCUMENT_NODE &&
 711           m_root==node.getOwnerDocument()) ||
 712          (m_root.getNodeType()!=DOCUMENT_NODE &&
 713           m_root.getOwnerDocument()==node.getOwnerDocument())
 714          )
 715         {
 716           // If node _is_ in m_root's tree, find its handle
 717           //
 718           // %OPT% This check may be improved significantly when DOM
 719           // Level 3 nodeKey and relative-order tests become
 720           // available!
 721           for(Node cursor=node;
 722               cursor!=null;
 723               cursor=
 724                 (cursor.getNodeType()!=ATTRIBUTE_NODE)
 725                 ? cursor.getParentNode()
 726                 : ((org.w3c.dom.Attr)cursor).getOwnerElement())
 727             {
 728               if(cursor==m_root)
 729                 // We know this node; find its handle.
 730                 return getHandleFromNode(node);
 731             } // for ancestors of node
 732         } // if node and m_root in same Document
 733     } // if node!=null
 734 
 735     return DTM.NULL;
 736   }
 737 
 738   /**
 739    * Retrieves an attribute node by by qualified name and namespace URI.
 740    *
 741    * @param nodeHandle int Handle of the node upon which to look up this attribute..
 742    * @param namespaceURI The namespace URI of the attribute to
 743    *   retrieve, or null.
 744    * @param name The local name of the attribute to
 745    *   retrieve.
 746    * @return The attribute node handle with the specified name (
 747    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
 748    *   attribute.
 749    */
 750   public int getAttributeNode(int nodeHandle, String namespaceURI,
 751                               String name)
 752   {
 753 
 754     // %OPT% This is probably slower than it needs to be.
 755     if (null == namespaceURI)
 756       namespaceURI = "";
 757 
 758     int type = getNodeType(nodeHandle);
 759 
 760     if (DTM.ELEMENT_NODE == type)
 761     {
 762 
 763       // Assume that attributes immediately follow the element.
 764       int identity = makeNodeIdentity(nodeHandle);
 765 
 766       while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
 767       {
 768         // Assume this can not be null.
 769         type = _type(identity);
 770 
 771                                 // %REVIEW%
 772                                 // Should namespace nodes be retrievable DOM-style as attrs?
 773                                 // If not we need a separate function... which may be desirable
 774                                 // architecturally, but which is ugly from a code point of view.
 775                                 // (If we REALLY insist on it, this code should become a subroutine
 776                                 // of both -- retrieve the node, then test if the type matches
 777                                 // what you're looking for.)
 778         if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
 779         {
 780           Node node = lookupNode(identity);
 781           String nodeuri = node.getNamespaceURI();
 782 
 783           if (null == nodeuri)
 784             nodeuri = "";
 785 
 786           String nodelocalname = node.getLocalName();
 787 
 788           if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
 789             return makeNodeHandle(identity);
 790         }
 791 
 792         else // if (DTM.NAMESPACE_NODE != type)
 793         {
 794           break;
 795         }
 796       }
 797     }
 798 
 799     return DTM.NULL;
 800   }
 801 
 802   /**
 803    * Get the string-value of a node as a String object
 804    * (see http://www.w3.org/TR/xpath#data-model
 805    * for the definition of a node's string-value).
 806    *
 807    * @param nodeHandle The node ID.
 808    *
 809    * @return A string object that represents the string-value of the given node.
 810    */
 811   public XMLString getStringValue(int nodeHandle)
 812   {
 813 
 814     int type = getNodeType(nodeHandle);
 815     Node node = getNode(nodeHandle);
 816     // %TBD% If an element only has one text node, we should just use it
 817     // directly.
 818     if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
 819     || DTM.DOCUMENT_FRAGMENT_NODE == type)
 820     {
 821       FastStringBuffer buf = StringBufferPool.get();
 822       String s;
 823 
 824       try
 825       {
 826         getNodeData(node, buf);
 827 
 828         s = (buf.length() > 0) ? buf.toString() : "";
 829       }
 830       finally
 831       {
 832         StringBufferPool.free(buf);
 833       }
 834 
 835       return m_xstrf.newstr( s );
 836     }
 837     else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
 838     {
 839       // If this is a DTM text node, it may be made of multiple DOM text
 840       // nodes -- including navigating into Entity References. DOM2DTM
 841       // records the first node in the sequence and requires that we
 842       // pick up the others when we retrieve the DTM node's value.
 843       //
 844       // %REVIEW% DOM Level 3 is expected to add a "whole text"
 845       // retrieval method which performs this function for us.
 846       FastStringBuffer buf = StringBufferPool.get();
 847       while(node!=null)
 848       {
 849         buf.append(node.getNodeValue());
 850         node=logicalNextDOMTextNode(node);
 851       }
 852       String s=(buf.length() > 0) ? buf.toString() : "";
 853       StringBufferPool.free(buf);
 854       return m_xstrf.newstr( s );
 855     }
 856     else
 857       return m_xstrf.newstr( node.getNodeValue() );
 858   }
 859 
 860   /**
 861    * Determine if the string-value of a node is whitespace
 862    *
 863    * @param nodeHandle The node Handle.
 864    *
 865    * @return Return true if the given node is whitespace.
 866    */
 867   public boolean isWhitespace(int nodeHandle)
 868   {
 869         int type = getNodeType(nodeHandle);
 870     Node node = getNode(nodeHandle);
 871         if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
 872     {
 873       // If this is a DTM text node, it may be made of multiple DOM text
 874       // nodes -- including navigating into Entity References. DOM2DTM
 875       // records the first node in the sequence and requires that we
 876       // pick up the others when we retrieve the DTM node's value.
 877       //
 878       // %REVIEW% DOM Level 3 is expected to add a "whole text"
 879       // retrieval method which performs this function for us.
 880       FastStringBuffer buf = StringBufferPool.get();
 881       while(node!=null)
 882       {
 883         buf.append(node.getNodeValue());
 884         node=logicalNextDOMTextNode(node);
 885       }
 886      boolean b = buf.isWhitespace(0, buf.length());
 887       StringBufferPool.free(buf);
 888      return b;
 889     }
 890     return false;
 891   }
 892 
 893   /**
 894    * Retrieve the text content of a DOM subtree, appending it into a
 895    * user-supplied FastStringBuffer object. Note that attributes are
 896    * not considered part of the content of an element.
 897    * <p>
 898    * There are open questions regarding whitespace stripping.
 899    * Currently we make no special effort in that regard, since the standard
 900    * DOM doesn't yet provide DTD-based information to distinguish
 901    * whitespace-in-element-context from genuine #PCDATA. Note that we
 902    * should probably also consider xml:space if/when we address this.
 903    * DOM Level 3 may solve the problem for us.
 904    * <p>
 905    * %REVIEW% Actually, since this method operates on the DOM side of the
 906    * fence rather than the DTM side, it SHOULDN'T do
 907    * any special handling. The DOM does what the DOM does; if you want
 908    * DTM-level abstractions, use DTM-level methods.
 909    *
 910    * @param node Node whose subtree is to be walked, gathering the
 911    * contents of all Text or CDATASection nodes.
 912    * @param buf FastStringBuffer into which the contents of the text
 913    * nodes are to be concatenated.
 914    */
 915   protected static void getNodeData(Node node, FastStringBuffer buf)
 916   {
 917 
 918     switch (node.getNodeType())
 919     {
 920     case Node.DOCUMENT_FRAGMENT_NODE :
 921     case Node.DOCUMENT_NODE :
 922     case Node.ELEMENT_NODE :
 923     {
 924       for (Node child = node.getFirstChild(); null != child;
 925               child = child.getNextSibling())
 926       {
 927         getNodeData(child, buf);
 928       }
 929     }
 930     break;
 931     case Node.TEXT_NODE :
 932     case Node.CDATA_SECTION_NODE :
 933     case Node.ATTRIBUTE_NODE :  // Never a child but might be our starting node
 934       buf.append(node.getNodeValue());
 935       break;
 936     case Node.PROCESSING_INSTRUCTION_NODE :
 937       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
 938       break;
 939     default :
 940       // ignore
 941       break;
 942     }
 943   }
 944 
 945   /**
 946    * Given a node handle, return its DOM-style node name. This will
 947    * include names such as #text or #document.
 948    *
 949    * @param nodeHandle the id of the node.
 950    * @return String Name of this node, which may be an empty string.
 951    * %REVIEW% Document when empty string is possible...
 952    * %REVIEW-COMMENT% It should never be empty, should it?
 953    */
 954   public String getNodeName(int nodeHandle)
 955   {
 956 
 957     Node node = getNode(nodeHandle);
 958 
 959     // Assume non-null.
 960     return node.getNodeName();
 961   }
 962 
 963   /**
 964    * Given a node handle, return the XPath node name.  This should be
 965    * the name as described by the XPath data model, NOT the DOM-style
 966    * name.
 967    *
 968    * @param nodeHandle the id of the node.
 969    * @return String Name of this node, which may be an empty string.
 970    */
 971   public String getNodeNameX(int nodeHandle)
 972   {
 973 
 974     String name;
 975     short type = getNodeType(nodeHandle);
 976 
 977     switch (type)
 978     {
 979     case DTM.NAMESPACE_NODE :
 980     {
 981       Node node = getNode(nodeHandle);
 982 
 983       // assume not null.
 984       name = node.getNodeName();
 985       if(name.startsWith("xmlns:"))
 986       {
 987         name = QName.getLocalPart(name);
 988       }
 989       else if(name.equals("xmlns"))
 990       {
 991         name = "";
 992       }
 993     }
 994     break;
 995     case DTM.ATTRIBUTE_NODE :
 996     case DTM.ELEMENT_NODE :
 997     case DTM.ENTITY_REFERENCE_NODE :
 998     case DTM.PROCESSING_INSTRUCTION_NODE :
 999     {
1000       Node node = getNode(nodeHandle);
1001 
1002       // assume not null.
1003       name = node.getNodeName();
1004     }
1005     break;
1006     default :
1007       name = "";
1008     }
1009 
1010     return name;
1011   }
1012 
1013   /**
1014    * Given a node handle, return its XPath-style localname.
1015    * (As defined in Namespaces, this is the portion of the name after any
1016    * colon character).
1017    *
1018    * @param nodeHandle the id of the node.
1019    * @return String Local name of this node.
1020    */
1021   public String getLocalName(int nodeHandle)
1022   {
1023     if(JJK_NEWCODE)
1024     {
1025       int id=makeNodeIdentity(nodeHandle);
1026       if(NULL==id) return null;
1027       Node newnode=m_nodes.get(id);
1028       String newname=newnode.getLocalName();
1029       if (null == newname)
1030       {
1031         // XSLT treats PIs, and possibly other things, as having QNames.
1032         String qname = newnode.getNodeName();
1033         if('#'==qname.charAt(0))
1034         {
1035           //  Match old default for this function
1036           // This conversion may or may not be necessary
1037           newname="";
1038         }
1039         else
1040         {
1041           int index = qname.indexOf(':');
1042           newname = (index < 0) ? qname : qname.substring(index + 1);
1043         }
1044       }
1045       return newname;
1046     }
1047     else
1048     {
1049       String name;
1050       short type = getNodeType(nodeHandle);
1051       switch (type)
1052       {
1053       case DTM.ATTRIBUTE_NODE :
1054       case DTM.ELEMENT_NODE :
1055       case DTM.ENTITY_REFERENCE_NODE :
1056       case DTM.NAMESPACE_NODE :
1057       case DTM.PROCESSING_INSTRUCTION_NODE :
1058         {
1059           Node node = getNode(nodeHandle);
1060 
1061           // assume not null.
1062           name = node.getLocalName();
1063 
1064           if (null == name)
1065           {
1066             String qname = node.getNodeName();
1067             int index = qname.indexOf(':');
1068 
1069             name = (index < 0) ? qname : qname.substring(index + 1);
1070           }
1071         }
1072         break;
1073       default :
1074         name = "";
1075       }
1076       return name;
1077     }
1078   }
1079 
1080   /**
1081    * Given a namespace handle, return the prefix that the namespace decl is
1082    * mapping.
1083    * Given a node handle, return the prefix used to map to the namespace.
1084    *
1085    * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1086    * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1087    *
1088    * @param nodeHandle the id of the node.
1089    * @return String prefix of this node's name, or "" if no explicit
1090    * namespace prefix was given.
1091    */
1092   public String getPrefix(int nodeHandle)
1093   {
1094 
1095     String prefix;
1096     short type = getNodeType(nodeHandle);
1097 
1098     switch (type)
1099     {
1100     case DTM.NAMESPACE_NODE :
1101     {
1102       Node node = getNode(nodeHandle);
1103 
1104       // assume not null.
1105       String qname = node.getNodeName();
1106       int index = qname.indexOf(':');
1107 
1108       prefix = (index < 0) ? "" : qname.substring(index + 1);
1109     }
1110     break;
1111     case DTM.ATTRIBUTE_NODE :
1112     case DTM.ELEMENT_NODE :
1113     {
1114       Node node = getNode(nodeHandle);
1115 
1116       // assume not null.
1117       String qname = node.getNodeName();
1118       int index = qname.indexOf(':');
1119 
1120       prefix = (index < 0) ? "" : qname.substring(0, index);
1121     }
1122     break;
1123     default :
1124       prefix = "";
1125     }
1126 
1127     return prefix;
1128   }
1129 
1130   /**
1131    * Given a node handle, return its DOM-style namespace URI
1132    * (As defined in Namespaces, this is the declared URI which this node's
1133    * prefix -- or default in lieu thereof -- was mapped to.)
1134    *
1135    * <p>%REVIEW% Null or ""? -sb</p>
1136    *
1137    * @param nodeHandle the id of the node.
1138    * @return String URI value of this node's namespace, or null if no
1139    * namespace was resolved.
1140    */
1141   public String getNamespaceURI(int nodeHandle)
1142   {
1143     if(JJK_NEWCODE)
1144     {
1145       int id=makeNodeIdentity(nodeHandle);
1146       if(id==NULL) return null;
1147       Node node=m_nodes.get(id);
1148       return node.getNamespaceURI();
1149     }
1150     else
1151     {
1152       String nsuri;
1153       short type = getNodeType(nodeHandle);
1154 
1155       switch (type)
1156       {
1157       case DTM.ATTRIBUTE_NODE :
1158       case DTM.ELEMENT_NODE :
1159       case DTM.ENTITY_REFERENCE_NODE :
1160       case DTM.NAMESPACE_NODE :
1161       case DTM.PROCESSING_INSTRUCTION_NODE :
1162         {
1163           Node node = getNode(nodeHandle);
1164 
1165           // assume not null.
1166           nsuri = node.getNamespaceURI();
1167 
1168           // %TBD% Handle DOM1?
1169         }
1170         break;
1171       default :
1172         nsuri = null;
1173       }
1174 
1175       return nsuri;
1176     }
1177 
1178   }
1179 
1180   /** Utility function: Given a DOM Text node, determine whether it is
1181    * logically followed by another Text or CDATASection node. This may
1182    * involve traversing into Entity References.
1183    *
1184    * %REVIEW% DOM Level 3 is expected to add functionality which may
1185    * allow us to retire this.
1186    */
1187   private Node logicalNextDOMTextNode(Node n)
1188   {
1189         Node p=n.getNextSibling();
1190         if(p==null)
1191         {
1192                 // Walk out of any EntityReferenceNodes that ended with text
1193                 for(n=n.getParentNode();
1194                         n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1195                         n=n.getParentNode())
1196                 {
1197                         p=n.getNextSibling();
1198                         if(p!=null)
1199                                 break;
1200                 }
1201         }
1202         n=p;
1203         while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1204         {
1205                 // Walk into any EntityReferenceNodes that start with text
1206                 if(n.hasChildNodes())
1207                         n=n.getFirstChild();
1208                 else
1209                         n=n.getNextSibling();
1210         }
1211         if(n!=null)
1212         {
1213                 // Found a logical next sibling. Is it text?
1214                 int ntype=n.getNodeType();
1215                 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1216                         n=null;
1217         }
1218         return n;
1219   }
1220 
1221   /**
1222    * Given a node handle, return its node value. This is mostly
1223    * as defined by the DOM, but may ignore some conveniences.
1224    * <p>
1225    *
1226    * @param nodeHandle The node id.
1227    * @return String Value of this node, or null if not
1228    * meaningful for this node type.
1229    */
1230   public String getNodeValue(int nodeHandle)
1231   {
1232     // The _type(nodeHandle) call was taking the lion's share of our
1233     // time, and was wrong anyway since it wasn't coverting handle to
1234     // identity. Inlined it.
1235     int type = _exptype(makeNodeIdentity(nodeHandle));
1236     type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1237 
1238     if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1239       return getNode(nodeHandle).getNodeValue();
1240 
1241     // If this is a DTM text node, it may be made of multiple DOM text
1242     // nodes -- including navigating into Entity References. DOM2DTM
1243     // records the first node in the sequence and requires that we
1244     // pick up the others when we retrieve the DTM node's value.
1245     //
1246     // %REVIEW% DOM Level 3 is expected to add a "whole text"
1247     // retrieval method which performs this function for us.
1248     Node node = getNode(nodeHandle);
1249     Node n=logicalNextDOMTextNode(node);
1250     if(n==null)
1251       return node.getNodeValue();
1252 
1253     FastStringBuffer buf = StringBufferPool.get();
1254         buf.append(node.getNodeValue());
1255     while(n!=null)
1256     {
1257       buf.append(n.getNodeValue());
1258       n=logicalNextDOMTextNode(n);
1259     }
1260     String s = (buf.length() > 0) ? buf.toString() : "";
1261     StringBufferPool.free(buf);
1262     return s;
1263   }
1264 
1265   /**
1266    *   A document type declaration information item has the following properties:
1267    *
1268    *     1. [system identifier] The system identifier of the external subset, if
1269    *        it exists. Otherwise this property has no value.
1270    *
1271    * @return the system identifier String object, or null if there is none.
1272    */
1273   public String getDocumentTypeDeclarationSystemIdentifier()
1274   {
1275 
1276     Document doc;
1277 
1278     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1279       doc = (Document) m_root;
1280     else
1281       doc = m_root.getOwnerDocument();
1282 
1283     if (null != doc)
1284     {
1285       DocumentType dtd = doc.getDoctype();
1286 
1287       if (null != dtd)
1288       {
1289         return dtd.getSystemId();
1290       }
1291     }
1292 
1293     return null;
1294   }
1295 
1296   /**
1297    * Return the public identifier of the external subset,
1298    * normalized as described in 4.2.2 External Entities [XML]. If there is
1299    * no external subset or if it has no public identifier, this property
1300    * has no value.
1301    *
1302    * @return the public identifier String object, or null if there is none.
1303    */
1304   public String getDocumentTypeDeclarationPublicIdentifier()
1305   {
1306 
1307     Document doc;
1308 
1309     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1310       doc = (Document) m_root;
1311     else
1312       doc = m_root.getOwnerDocument();
1313 
1314     if (null != doc)
1315     {
1316       DocumentType dtd = doc.getDoctype();
1317 
1318       if (null != dtd)
1319       {
1320         return dtd.getPublicId();
1321       }
1322     }
1323 
1324     return null;
1325   }
1326 
1327   /**
1328    * Returns the <code>Element</code> whose <code>ID</code> is given by
1329    * <code>elementId</code>. If no such element exists, returns
1330    * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1331    * has this <code>ID</code>. Attributes (including those
1332    * with the name "ID") are not of type ID unless so defined by DTD/Schema
1333    * information available to the DTM implementation.
1334    * Implementations that do not know whether attributes are of type ID or
1335    * not are expected to return <code>DTM.NULL</code>.
1336    *
1337    * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1338    * and this operation searches only within a single document, right?
1339    * Wouldn't want collisions between DTMs in the same process.</p>
1340    *
1341    * @param elementId The unique <code>id</code> value for an element.
1342    * @return The handle of the matching element.
1343    */
1344   public int getElementById(String elementId)
1345   {
1346 
1347     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1348         ? (Document) m_root : m_root.getOwnerDocument();
1349 
1350     if(null != doc)
1351     {
1352       Node elem = doc.getElementById(elementId);
1353       if(null != elem)
1354       {
1355         int elemHandle = getHandleFromNode(elem);
1356 
1357         if(DTM.NULL == elemHandle)
1358         {
1359           int identity = m_nodes.size()-1;
1360           while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1361           {
1362             Node node = getNode(identity);
1363             if(node == elem)
1364             {
1365               elemHandle = getHandleFromNode(elem);
1366               break;
1367             }
1368            }
1369         }
1370 
1371         return elemHandle;
1372       }
1373 
1374     }
1375     return DTM.NULL;
1376   }
1377 
1378   /**
1379    * The getUnparsedEntityURI function returns the URI of the unparsed
1380    * entity with the specified name in the same document as the context
1381    * node (see [3.3 Unparsed Entities]). It returns the empty string if
1382    * there is no such entity.
1383    * <p>
1384    * XML processors may choose to use the System Identifier (if one
1385    * is provided) to resolve the entity, rather than the URI in the
1386    * Public Identifier. The details are dependent on the processor, and
1387    * we would have to support some form of plug-in resolver to handle
1388    * this properly. Currently, we simply return the System Identifier if
1389    * present, and hope that it a usable URI or that our caller can
1390    * map it to one.
1391    * TODO: Resolve Public Identifiers... or consider changing function name.
1392    * <p>
1393    * If we find a relative URI
1394    * reference, XML expects it to be resolved in terms of the base URI
1395    * of the document. The DOM doesn't do that for us, and it isn't
1396    * entirely clear whether that should be done here; currently that's
1397    * pushed up to a higher level of our application. (Note that DOM Level
1398    * 1 didn't store the document's base URI.)
1399    * TODO: Consider resolving Relative URIs.
1400    * <p>
1401    * (The DOM's statement that "An XML processor may choose to
1402    * completely expand entities before the structure model is passed
1403    * to the DOM" refers only to parsed entities, not unparsed, and hence
1404    * doesn't affect this function.)
1405    *
1406    * @param name A string containing the Entity Name of the unparsed
1407    * entity.
1408    *
1409    * @return String containing the URI of the Unparsed Entity, or an
1410    * empty string if no such entity exists.
1411    */
1412   public String getUnparsedEntityURI(String name)
1413   {
1414 
1415     String url = "";
1416     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1417         ? (Document) m_root : m_root.getOwnerDocument();
1418 
1419     if (null != doc)
1420     {
1421       DocumentType doctype = doc.getDoctype();
1422 
1423       if (null != doctype)
1424       {
1425         NamedNodeMap entities = doctype.getEntities();
1426         if(null == entities)
1427           return url;
1428         Entity entity = (Entity) entities.getNamedItem(name);
1429         if(null == entity)
1430           return url;
1431 
1432         String notationName = entity.getNotationName();
1433 
1434         if (null != notationName)  // then it's unparsed
1435         {
1436           // The draft says: "The XSLT processor may use the public
1437           // identifier to generate a URI for the entity instead of the URI
1438           // specified in the system identifier. If the XSLT processor does
1439           // not use the public identifier to generate the URI, it must use
1440           // the system identifier; if the system identifier is a relative
1441           // URI, it must be resolved into an absolute URI using the URI of
1442           // the resource containing the entity declaration as the base
1443           // URI [RFC2396]."
1444           // So I'm falling a bit short here.
1445           url = entity.getSystemId();
1446 
1447           if (null == url)
1448           {
1449             url = entity.getPublicId();
1450           }
1451           else
1452           {
1453             // This should be resolved to an absolute URL, but that's hard
1454             // to do from here.
1455           }
1456         }
1457       }
1458     }
1459 
1460     return url;
1461   }
1462 
1463   /**
1464    *     5. [specified] A flag indicating whether this attribute was actually
1465    *        specified in the start-tag of its element, or was defaulted from the
1466    *        DTD.
1467    *
1468    * @param attributeHandle the attribute handle
1469    * @return <code>true</code> if the attribute was specified;
1470    *         <code>false</code> if it was defaulted.
1471    */
1472   public boolean isAttributeSpecified(int attributeHandle)
1473   {
1474     int type = getNodeType(attributeHandle);
1475 
1476     if (DTM.ATTRIBUTE_NODE == type)
1477     {
1478       Attr attr = (Attr)getNode(attributeHandle);
1479       return attr.getSpecified();
1480     }
1481     return false;
1482   }
1483 
1484   /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1485    * we're wrapped around an existing DOM.
1486    *
1487    * @param source The IncrementalSAXSource that we want to recieve events from
1488    * on demand.
1489    */
1490   public void setIncrementalSAXSource(IncrementalSAXSource source)
1491   {
1492   }
1493 
1494   /** getContentHandler returns "our SAX builder" -- the thing that
1495    * someone else should send SAX events to in order to extend this
1496    * DTM model.
1497    *
1498    * @return null if this model doesn't respond to SAX events,
1499    * "this" if the DTM object has a built-in SAX ContentHandler,
1500    * the IncrmentalSAXSource if we're bound to one and should receive
1501    * the SAX stream via it for incremental build purposes...
1502    * */
1503   public org.xml.sax.ContentHandler getContentHandler()
1504   {
1505       return null;
1506   }
1507 
1508   /**
1509    * Return this DTM's lexical handler.
1510    *
1511    * %REVIEW% Should this return null if constrution already done/begun?
1512    *
1513    * @return null if this model doesn't respond to lexical SAX events,
1514    * "this" if the DTM object has a built-in SAX ContentHandler,
1515    * the IncrementalSAXSource if we're bound to one and should receive
1516    * the SAX stream via it for incremental build purposes...
1517    */
1518   public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1519   {
1520 
1521     return null;
1522   }
1523 
1524 
1525   /**
1526    * Return this DTM's EntityResolver.
1527    *
1528    * @return null if this model doesn't respond to SAX entity ref events.
1529    */
1530   public org.xml.sax.EntityResolver getEntityResolver()
1531   {
1532 
1533     return null;
1534   }
1535 
1536   /**
1537    * Return this DTM's DTDHandler.
1538    *
1539    * @return null if this model doesn't respond to SAX dtd events.
1540    */
1541   public org.xml.sax.DTDHandler getDTDHandler()
1542   {
1543 
1544     return null;
1545   }
1546 
1547   /**
1548    * Return this DTM's ErrorHandler.
1549    *
1550    * @return null if this model doesn't respond to SAX error events.
1551    */
1552   public org.xml.sax.ErrorHandler getErrorHandler()
1553   {
1554 
1555     return null;
1556   }
1557 
1558   /**
1559    * Return this DTM's DeclHandler.
1560    *
1561    * @return null if this model doesn't respond to SAX Decl events.
1562    */
1563   public org.xml.sax.ext.DeclHandler getDeclHandler()
1564   {
1565 
1566     return null;
1567   }
1568 
1569   /** @return true iff we're building this model incrementally (eg
1570    * we're partnered with a IncrementalSAXSource) and thus require that the
1571    * transformation and the parse run simultaneously. Guidance to the
1572    * DTMManager.
1573    * */
1574   public boolean needsTwoThreads()
1575   {
1576     return false;
1577   }
1578 
1579   // ========== Direct SAX Dispatch, for optimization purposes ========
1580 
1581   /**
1582    * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1583    * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1584    * the definition of <CODE>S</CODE></A> for details.
1585    * @param   ch      Character to check as XML whitespace.
1586    * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1587    */
1588   private static boolean isSpace(char ch)
1589   {
1590     return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1591   }
1592 
1593   /**
1594    * Directly call the
1595    * characters method on the passed ContentHandler for the
1596    * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1597    * for the definition of a node's string-value). Multiple calls to the
1598    * ContentHandler's characters methods may well occur for a single call to
1599    * this method.
1600    *
1601    * @param nodeHandle The node ID.
1602    * @param ch A non-null reference to a ContentHandler.
1603    *
1604    * @throws org.xml.sax.SAXException
1605    */
1606   public void dispatchCharactersEvents(
1607           int nodeHandle, org.xml.sax.ContentHandler ch,
1608           boolean normalize)
1609             throws org.xml.sax.SAXException
1610   {
1611     if(normalize)
1612     {
1613       XMLString str = getStringValue(nodeHandle);
1614       str = str.fixWhiteSpace(true, true, false);
1615       str.dispatchCharactersEvents(ch);
1616     }
1617     else
1618     {
1619       int type = getNodeType(nodeHandle);
1620       Node node = getNode(nodeHandle);
1621       dispatchNodeData(node, ch, 0);
1622           // Text coalition -- a DTM text node may represent multiple
1623           // DOM nodes.
1624           if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1625           {
1626                   while( null != (node=logicalNextDOMTextNode(node)) )
1627                   {
1628                       dispatchNodeData(node, ch, 0);
1629                   }
1630           }
1631     }
1632   }
1633 
1634   /**
1635    * Retrieve the text content of a DOM subtree, appending it into a
1636    * user-supplied FastStringBuffer object. Note that attributes are
1637    * not considered part of the content of an element.
1638    * <p>
1639    * There are open questions regarding whitespace stripping.
1640    * Currently we make no special effort in that regard, since the standard
1641    * DOM doesn't yet provide DTD-based information to distinguish
1642    * whitespace-in-element-context from genuine #PCDATA. Note that we
1643    * should probably also consider xml:space if/when we address this.
1644    * DOM Level 3 may solve the problem for us.
1645    * <p>
1646    * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1647    * routine _shouldn't_ perform any processing beyond what the DOM already
1648    * does, and that whitespace stripping and so on belong at the DTM level.
1649    * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1650    *
1651    * @param node Node whose subtree is to be walked, gathering the
1652    * contents of all Text or CDATASection nodes.
1653    */
1654   @SuppressWarnings("fallthrough")
1655   protected static void dispatchNodeData(Node node,
1656                                          org.xml.sax.ContentHandler ch,
1657                                          int depth)
1658             throws org.xml.sax.SAXException
1659   {
1660 
1661     switch (node.getNodeType())
1662     {
1663     case Node.DOCUMENT_FRAGMENT_NODE :
1664     case Node.DOCUMENT_NODE :
1665     case Node.ELEMENT_NODE :
1666     {
1667       for (Node child = node.getFirstChild(); null != child;
1668               child = child.getNextSibling())
1669       {
1670         dispatchNodeData(child, ch, depth+1);
1671       }
1672     }
1673     break;
1674     case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1675     case Node.COMMENT_NODE :
1676       if(0 != depth)
1677         break;
1678         // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1679         // to perform Text Coalition. That should only be done in DTM space.
1680     case Node.TEXT_NODE :
1681     case Node.CDATA_SECTION_NODE :
1682     case Node.ATTRIBUTE_NODE :
1683       String str = node.getNodeValue();
1684       if(ch instanceof CharacterNodeHandler)
1685       {
1686         ((CharacterNodeHandler)ch).characters(node);
1687       }
1688       else
1689       {
1690         ch.characters(str.toCharArray(), 0, str.length());
1691       }
1692       break;
1693 //    /* case Node.PROCESSING_INSTRUCTION_NODE :
1694 //      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1695 //      break; */
1696     default :
1697       // ignore
1698       break;
1699     }
1700   }
1701 
1702   TreeWalker m_walker = new TreeWalker(null);
1703 
1704   /**
1705    * Directly create SAX parser events from a subtree.
1706    *
1707    * @param nodeHandle The node ID.
1708    * @param ch A non-null reference to a ContentHandler.
1709    *
1710    * @throws org.xml.sax.SAXException
1711    */
1712   public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1713           throws org.xml.sax.SAXException
1714   {
1715     TreeWalker treeWalker = m_walker;
1716     ContentHandler prevCH = treeWalker.getContentHandler();
1717 
1718     if(null != prevCH)
1719     {
1720       treeWalker = new TreeWalker(null);
1721     }
1722     treeWalker.setContentHandler(ch);
1723 
1724     try
1725     {
1726       Node node = getNode(nodeHandle);
1727       treeWalker.traverseFragment(node);
1728     }
1729     finally
1730     {
1731       treeWalker.setContentHandler(null);
1732     }
1733   }
1734 
1735   public interface CharacterNodeHandler
1736   {
1737     public void characters(Node node)
1738             throws org.xml.sax.SAXException;
1739   }
1740 
1741   /**
1742    * For the moment all the run time properties are ignored by this
1743    * class.
1744    *
1745    * @param property a <code>String</code> value
1746    * @param value an <code>Object</code> value
1747    */
1748   public void setProperty(String property, Object value)
1749   {
1750   }
1751 
1752   /**
1753    * No source information is available for DOM2DTM, so return
1754    * <code>null</code> here.
1755    *
1756    * @param node an <code>int</code> value
1757    * @return null
1758    */
1759   public SourceLocator getSourceLocatorFor(int node)
1760   {
1761     return null;
1762   }
1763 
1764 }