1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Oct 2017 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm; 23 24 import com.sun.org.apache.xml.internal.dtm.DTM; 25 import com.sun.org.apache.xml.internal.dtm.DTMManager; 26 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter; 27 import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators; 28 import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault; 29 import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable; 30 import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource; 31 import com.sun.org.apache.xml.internal.res.XMLErrorResources; 32 import com.sun.org.apache.xml.internal.res.XMLMessages; 33 import com.sun.org.apache.xml.internal.utils.FastStringBuffer; 34 import com.sun.org.apache.xml.internal.utils.QName; 35 import com.sun.org.apache.xml.internal.utils.StringBufferPool; 36 import com.sun.org.apache.xml.internal.utils.TreeWalker; 37 import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer; 38 import com.sun.org.apache.xml.internal.utils.XMLString; 39 import com.sun.org.apache.xml.internal.utils.XMLStringFactory; 40 import java.util.ArrayList; 41 import java.util.List; 42 import javax.xml.transform.SourceLocator; 43 import javax.xml.transform.dom.DOMSource; 44 import org.w3c.dom.Attr; 45 import org.w3c.dom.Document; 46 import org.w3c.dom.DocumentType; 47 import org.w3c.dom.Element; 48 import org.w3c.dom.Entity; 49 import org.w3c.dom.NamedNodeMap; 50 import org.w3c.dom.Node; 51 import org.xml.sax.ContentHandler; 52 53 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the 54 * DTM API. 55 * 56 * Note that it doesn't necessarily represent a full Document 57 * tree. You can wrap a DOM2DTM around a specific node and its subtree 58 * and the right things should happen. (I don't _think_ we currently 59 * support DocumentFrgment nodes as roots, though that might be worth 60 * considering.) 61 * 62 * Note too that we do not currently attempt to track document 63 * mutation. If you alter the DOM after wrapping DOM2DTM around it, 64 * all bets are off. 65 * */ 66 public class DOM2DTM extends DTMDefaultBaseIterators 67 { 68 static final boolean JJK_DEBUG=false; 69 static final boolean JJK_NEWCODE=true; 70 71 /** Manefest constant 72 */ 73 static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; 74 75 /** The current position in the DOM tree. Last node examined for 76 * possible copying to DTM. */ 77 transient private Node m_pos; 78 /** The current position in the DTM tree. Who children get appended to. */ 79 private int m_last_parent=0; 80 /** The current position in the DTM tree. Who children reference as their 81 * previous sib. */ 82 private int m_last_kid=NULL; 83 84 /** The top of the subtree. 85 * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' 86 * */ 87 transient private Node m_root; 88 89 /** True iff the first element has been processed. This is used to control 90 synthesis of the implied xml: namespace declaration node. */ 91 boolean m_processedFirstElement=false; 92 93 /** true if ALL the nodes in the m_root subtree have been processed; 94 * false if our incremental build has not yet finished scanning the 95 * DOM tree. */ 96 transient private boolean m_nodesAreProcessed; 97 98 /** The node objects. The instance part of the handle indexes 99 * directly into this vector. Each DTM node may actually be 100 * composed of several DOM nodes (for example, if logically-adjacent 101 * Text/CDATASection nodes in the DOM have been coalesced into a 102 * single DTM Text node); this table points only to the first in 103 * that sequence. */ 104 protected List<Node> m_nodes = new ArrayList<>(); 105 106 /** 107 * Construct a DOM2DTM object from a DOM node. 108 * 109 * @param mgr The DTMManager who owns this DTM. 110 * @param domSource the DOM source that this DTM will wrap. 111 * @param dtmIdentity The DTM identity ID for this DTM. 112 * @param whiteSpaceFilter The white space filter for this DTM, which may 113 * be null. 114 * @param xstringfactory XMLString factory for creating character content. 115 * @param doIndexing true if the caller considers it worth it to use 116 * indexing schemes. 117 */ 118 public DOM2DTM(DTMManager mgr, DOMSource domSource, 119 int dtmIdentity, DTMWSFilter whiteSpaceFilter, 120 XMLStringFactory xstringfactory, 121 boolean doIndexing) 122 { 123 super(mgr, domSource, dtmIdentity, whiteSpaceFilter, 124 xstringfactory, doIndexing); 125 126 // Initialize DOM navigation 127 m_pos=m_root = domSource.getNode(); 128 // Initialize DTM navigation 129 m_last_parent=m_last_kid=NULL; 130 m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); 131 132 // Apparently the domSource root may not actually be the 133 // Document node. If it's an Element node, we need to immediately 134 // add its attributes. Adapted from nextNode(). 135 // %REVIEW% Move this logic into addNode and recurse? Cleaner! 136 // 137 // (If it's an EntityReference node, we're probably scrod. For now 138 // I'm just hoping nobody is ever quite that foolish... %REVIEW%) 139 // 140 // %ISSUE% What about inherited namespaces in this case? 141 // Do we need to special-case initialize them into the DTM model? 142 if(ELEMENT_NODE == m_root.getNodeType()) 143 { 144 NamedNodeMap attrs=m_root.getAttributes(); 145 int attrsize=(attrs==null) ? 0 : attrs.getLength(); 146 if(attrsize>0) 147 { 148 int attrIndex=NULL; // start with no previous sib 149 for(int i=0;i<attrsize;++i) 150 { 151 // No need to force nodetype in this case; 152 // addNode() will take care of switching it from 153 // Attr to Namespace if necessary. 154 attrIndex=addNode(attrs.item(i),0,attrIndex,NULL); 155 m_firstch.setElementAt(DTM.NULL,attrIndex); 156 } 157 // Terminate list of attrs, and make sure they aren't 158 // considered children of the element 159 m_nextsib.setElementAt(DTM.NULL,attrIndex); 160 161 // IMPORTANT: This does NOT change m_last_parent or m_last_kid! 162 } // if attrs exist 163 } //if(ELEMENT_NODE) 164 165 // Initialize DTM-completed status 166 m_nodesAreProcessed = false; 167 } 168 169 /** 170 * Construct the node map from the node. 171 * 172 * @param node The node that is to be added to the DTM. 173 * @param parentIndex The current parent index. 174 * @param previousSibling The previous sibling index. 175 * @param forceNodeType If not DTM.NULL, overrides the DOM node type. 176 * Used to force nodes to Text rather than CDATASection when their 177 * coalesced value includes ordinary Text nodes (current DTM behavior). 178 * 179 * @return The index identity of the node that was added. 180 */ 181 protected int addNode(Node node, int parentIndex, 182 int previousSibling, int forceNodeType) 183 { 184 int nodeIndex = m_nodes.size(); 185 186 // Have we overflowed a DTM Identity's addressing range? 187 if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) 188 { 189 try 190 { 191 if(m_mgr==null) 192 throw new ClassCastException(); 193 194 // Handle as Extended Addressing 195 DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; 196 int id=mgrD.getFirstFreeDTMID(); 197 mgrD.addDTM(this,id,nodeIndex); 198 m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS); 199 } 200 catch(ClassCastException e) 201 { 202 // %REVIEW% Wrong error message, but I've been told we're trying 203 // not to add messages right not for I18N reasons. 204 // %REVIEW% Should this be a Fatal Error? 205 error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; 206 } 207 } 208 209 m_size++; 210 // ensureSize(nodeIndex); 211 212 int type; 213 if(NULL==forceNodeType) 214 type = node.getNodeType(); 215 else 216 type=forceNodeType; 217 218 // %REVIEW% The Namespace Spec currently says that Namespaces are 219 // processed in a non-namespace-aware manner, by matching the 220 // QName, even though there is in fact a namespace assigned to 221 // these nodes in the DOM. If and when that changes, we will have 222 // to consider whether we check the namespace-for-namespaces 223 // rather than the node name. 224 // 225 // %TBD% Note that the DOM does not necessarily explicitly declare 226 // all the namespaces it uses. DOM Level 3 will introduce a 227 // namespace-normalization operation which reconciles that, and we 228 // can request that users invoke it or otherwise ensure that the 229 // tree is namespace-well-formed before passing the DOM to Xalan. 230 // But if they don't, what should we do about it? We probably 231 // don't want to alter the source DOM (and may not be able to do 232 // so if it's read-only). The best available answer might be to 233 // synthesize additional DTM Namespace Nodes that don't correspond 234 // to DOM Attr Nodes. 235 if (Node.ATTRIBUTE_NODE == type) 236 { 237 String name = node.getNodeName(); 238 239 if (name.startsWith("xmlns:") || name.equals("xmlns")) 240 { 241 type = DTM.NAMESPACE_NODE; 242 } 243 } 244 245 m_nodes.add(node); 246 247 m_firstch.setElementAt(NOTPROCESSED,nodeIndex); 248 m_nextsib.setElementAt(NOTPROCESSED,nodeIndex); 249 m_prevsib.setElementAt(previousSibling,nodeIndex); 250 m_parent.setElementAt(parentIndex,nodeIndex); 251 252 if(DTM.NULL != parentIndex && 253 type != DTM.ATTRIBUTE_NODE && 254 type != DTM.NAMESPACE_NODE) 255 { 256 // If the DTM parent had no children, this becomes its first child. 257 if(NOTPROCESSED == m_firstch.elementAt(parentIndex)) 258 m_firstch.setElementAt(nodeIndex,parentIndex); 259 } 260 261 String nsURI = node.getNamespaceURI(); 262 263 // Deal with the difference between Namespace spec and XSLT 264 // definitions of local name. (The former says PIs don't have 265 // localnames; the latter says they do.) 266 String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? 267 node.getNodeName() : 268 node.getLocalName(); 269 270 // Hack to make DOM1 sort of work... 271 if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) 272 && null == localName) 273 localName = node.getNodeName(); // -sb 274 275 ExpandedNameTable exnt = m_expandedNameTable; 276 277 // %TBD% Nodes created with the old non-namespace-aware DOM 278 // calls createElement() and createAttribute() will never have a 279 // localname. That will cause their expandedNameID to be just the 280 // nodeType... which will keep them from being matched 281 // successfully by name. Since the DOM makes no promise that 282 // those will participate in namespace processing, this is 283 // officially accepted as Not Our Fault. But it might be nice to 284 // issue a diagnostic message! 285 if(node.getLocalName()==null && 286 (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE)) 287 { 288 // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM."); 289 } 290 291 int expandedNameID = (null != localName) 292 ? exnt.getExpandedTypeID(nsURI, localName, type) : 293 exnt.getExpandedTypeID(type); 294 295 m_exptype.setElementAt(expandedNameID,nodeIndex); 296 297 indexNode(expandedNameID, nodeIndex); 298 299 if (DTM.NULL != previousSibling) 300 m_nextsib.setElementAt(nodeIndex,previousSibling); 301 302 // This should be done after m_exptype has been set, and probably should 303 // always be the last thing we do 304 if (type == DTM.NAMESPACE_NODE) 305 declareNamespaceInContext(parentIndex,nodeIndex); 306 307 return nodeIndex; 308 } 309 310 /** 311 * Get the number of nodes that have been added. 312 */ 313 public int getNumberOfNodes() 314 { 315 return m_nodes.size(); 316 } 317 318 /** 319 * This method iterates to the next node that will be added to the table. 320 * Each call to this method adds a new node to the table, unless the end 321 * is reached, in which case it returns null. 322 * 323 * @return The true if a next node is found or false if 324 * there are no more nodes. 325 */ 326 protected boolean nextNode() 327 { 328 // Non-recursive one-fetch-at-a-time depth-first traversal with 329 // attribute/namespace nodes and white-space stripping. 330 // Navigating the DOM is simple, navigating the DTM is simple; 331 // keeping track of both at once is a trifle baroque but at least 332 // we've avoided most of the special cases. 333 if (m_nodesAreProcessed) 334 return false; 335 336 // %REVIEW% Is this local copy Really Useful from a performance 337 // point of view? Or is this a false microoptimization? 338 Node pos=m_pos; 339 Node next=null; 340 int nexttype=NULL; 341 342 // Navigate DOM tree 343 do 344 { 345 // Look down to first child. 346 if (pos.hasChildNodes()) 347 { 348 next = pos.getFirstChild(); 349 350 // %REVIEW% There's probably a more elegant way to skip 351 // the doctype. (Just let it go and Suppress it? 352 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) 353 next=next.getNextSibling(); 354 355 // Push DTM context -- except for children of Entity References, 356 // which have no DTM equivalent and cause no DTM navigation. 357 if(ENTITY_REFERENCE_NODE!=pos.getNodeType()) 358 { 359 m_last_parent=m_last_kid; 360 m_last_kid=NULL; 361 // Whitespace-handler context stacking 362 if(null != m_wsfilter) 363 { 364 short wsv = 365 m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this); 366 boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) 367 ? getShouldStripWhitespace() 368 : (DTMWSFilter.STRIP == wsv); 369 pushShouldStripWhitespace(shouldStrip); 370 } // if(m_wsfilter) 371 } 372 } 373 374 // If that fails, look up and right (but not past root!) 375 else 376 { 377 if(m_last_kid!=NULL) 378 { 379 // Last node posted at this level had no more children 380 // If it has _no_ children, we need to record that. 381 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED) 382 m_firstch.setElementAt(NULL,m_last_kid); 383 } 384 385 while(m_last_parent != NULL) 386 { 387 // %REVIEW% There's probably a more elegant way to 388 // skip the doctype. (Just let it go and Suppress it? 389 next = pos.getNextSibling(); 390 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) 391 next=next.getNextSibling(); 392 393 if(next!=null) 394 break; // Found it! 395 396 // No next-sibling found. Pop the DOM. 397 pos=pos.getParentNode(); 398 if(pos==null) 399 { 400 // %TBD% Should never arise, but I want to be sure of that... 401 if(JJK_DEBUG) 402 { 403 System.out.println("***** DOM2DTM Pop Control Flow problem"); 404 for(;;); // Freeze right here! 405 } 406 } 407 408 // The only parents in the DTM are Elements. However, 409 // the DOM could contain EntityReferences. If we 410 // encounter one, pop it _without_ popping DTM. 411 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType()) 412 { 413 // Nothing needs doing 414 if(JJK_DEBUG) 415 System.out.println("***** DOM2DTM popping EntRef"); 416 } 417 else 418 { 419 popShouldStripWhitespace(); 420 // Fix and pop DTM 421 if(m_last_kid==NULL) 422 m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element 423 else 424 m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else 425 m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent); 426 } 427 } 428 if(m_last_parent==NULL) 429 next=null; 430 } 431 432 if(next!=null) 433 nexttype=next.getNodeType(); 434 435 // If it's an entity ref, advance past it. 436 // 437 // %REVIEW% Should we let this out the door and just suppress it? 438 // More work, but simpler code, more likely to be correct, and 439 // it doesn't happen very often. We'd get rid of the loop too. 440 if (ENTITY_REFERENCE_NODE == nexttype) 441 pos=next; 442 } 443 while (ENTITY_REFERENCE_NODE == nexttype); 444 445 // Did we run out of the tree? 446 if(next==null) 447 { 448 m_nextsib.setElementAt(NULL,0); 449 m_nodesAreProcessed = true; 450 m_pos=null; 451 452 if(JJK_DEBUG) 453 { 454 System.out.println("***** DOM2DTM Crosscheck:"); 455 for(int i=0;i<m_nodes.size();++i) 456 System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i)); 457 } 458 459 return false; 460 } 461 462 // Text needs some special handling: 463 // 464 // DTM may skip whitespace. This is handled by the suppressNode flag, which 465 // when true will keep the DTM node from being created. 466 // 467 // DTM only directly records the first DOM node of any logically-contiguous 468 // sequence. The lastTextNode value will be set to the last node in the 469 // contiguous sequence, and -- AFTER the DTM addNode -- can be used to 470 // advance next over this whole block. Should be simpler than special-casing 471 // the above loop for "Was the logically-preceeding sibling a text node". 472 // 473 // Finally, a DTM node should be considered a CDATASection only if all the 474 // contiguous text it covers is CDATASections. The first Text should 475 // force DTM to Text. 476 477 boolean suppressNode=false; 478 Node lastTextNode=null; 479 480 nexttype=next.getNodeType(); 481 482 // nexttype=pos.getNodeType(); 483 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) 484 { 485 // If filtering, initially assume we're going to suppress the node 486 suppressNode=((null != m_wsfilter) && getShouldStripWhitespace()); 487 488 // Scan logically contiguous text (siblings, plus "flattening" 489 // of entity reference boundaries). 490 Node n=next; 491 while(n!=null) 492 { 493 lastTextNode=n; 494 // Any Text node means DTM considers it all Text 495 if(TEXT_NODE == n.getNodeType()) 496 nexttype=TEXT_NODE; 497 // Any non-whitespace in this sequence blocks whitespace 498 // suppression 499 suppressNode &= 500 XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue()); 501 502 n=logicalNextDOMTextNode(n); 503 } 504 } 505 506 // Special handling for PIs: Some DOMs represent the XML 507 // Declaration as a PI. This is officially incorrect, per the DOM 508 // spec, but is considered a "wrong but tolerable" temporary 509 // workaround pending proper handling of these fields in DOM Level 510 // 3. We want to recognize and reject that case. 511 else if(PROCESSING_INSTRUCTION_NODE==nexttype) 512 { 513 suppressNode = (pos.getNodeName().toLowerCase().equals("xml")); 514 } 515 516 517 if(!suppressNode) 518 { 519 // Inserting next. NOTE that we force the node type; for 520 // coalesced Text, this records CDATASections adjacent to 521 // ordinary Text as Text. 522 int nextindex=addNode(next,m_last_parent,m_last_kid, 523 nexttype); 524 525 m_last_kid=nextindex; 526 527 if(ELEMENT_NODE == nexttype) 528 { 529 int attrIndex=NULL; // start with no previous sib 530 // Process attributes _now_, rather than waiting. 531 // Simpler control flow, makes NS cache available immediately. 532 NamedNodeMap attrs=next.getAttributes(); 533 int attrsize=(attrs==null) ? 0 : attrs.getLength(); 534 if(attrsize>0) 535 { 536 for(int i=0;i<attrsize;++i) 537 { 538 // No need to force nodetype in this case; 539 // addNode() will take care of switching it from 540 // Attr to Namespace if necessary. 541 attrIndex=addNode(attrs.item(i), 542 nextindex,attrIndex,NULL); 543 m_firstch.setElementAt(DTM.NULL,attrIndex); 544 545 // If the xml: prefix is explicitly declared 546 // we don't need to synthesize one. 547 // 548 // NOTE that XML Namespaces were not originally 549 // defined as being namespace-aware (grrr), and 550 // while the W3C is planning to fix this it's 551 // safer for now to test the QName and trust the 552 // parsers to prevent anyone from redefining the 553 // reserved xmlns: prefix 554 if(!m_processedFirstElement 555 && "xmlns:xml".equals(attrs.item(i).getNodeName())) 556 m_processedFirstElement=true; 557 } 558 // Terminate list of attrs, and make sure they aren't 559 // considered children of the element 560 } // if attrs exist 561 if(!m_processedFirstElement) 562 { 563 // The DOM might not have an explicit declaration for the 564 // implicit "xml:" prefix, but the XPath data model 565 // requires that this appear as a Namespace Node so we 566 // have to synthesize one. You can think of this as 567 // being a default attribute defined by the XML 568 // Namespaces spec rather than by the DTD. 569 attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode( 570 (Element)next,"xml",NAMESPACE_DECL_NS, 571 makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1) 572 ), 573 nextindex,attrIndex,NULL); 574 m_firstch.setElementAt(DTM.NULL,attrIndex); 575 m_processedFirstElement=true; 576 } 577 if(attrIndex!=NULL) 578 m_nextsib.setElementAt(DTM.NULL,attrIndex); 579 } //if(ELEMENT_NODE) 580 } // (if !suppressNode) 581 582 // Text postprocessing: Act on values stored above 583 if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) 584 { 585 // %TBD% If nexttype was forced to TEXT, patch the DTM node 586 587 next=lastTextNode; // Advance the DOM cursor over contiguous text 588 } 589 590 // Remember where we left off. 591 m_pos=next; 592 return true; 593 } 594 595 596 /** 597 * Return an DOM node for the given node. 598 * 599 * @param nodeHandle The node ID. 600 * 601 * @return A node representation of the DTM node. 602 */ 603 public Node getNode(int nodeHandle) 604 { 605 606 int identity = makeNodeIdentity(nodeHandle); 607 608 return m_nodes.get(identity); 609 } 610 611 /** 612 * Get a Node from an identity index. 613 * 614 * NEEDSDOC @param nodeIdentity 615 * 616 * NEEDSDOC ($objectName$) @return 617 */ 618 protected Node lookupNode(int nodeIdentity) 619 { 620 return m_nodes.get(nodeIdentity); 621 } 622 623 /** 624 * Get the next node identity value in the list, and call the iterator 625 * if it hasn't been added yet. 626 * 627 * @param identity The node identity (index). 628 * @return identity+1, or DTM.NULL. 629 */ 630 protected int getNextNodeIdentity(int identity) 631 { 632 633 identity += 1; 634 635 if (identity >= m_nodes.size()) 636 { 637 if (!nextNode()) 638 identity = DTM.NULL; 639 } 640 641 return identity; 642 } 643 644 /** 645 * Get the handle from a Node. 646 * <p>%OPT% This will be pretty slow.</p> 647 * 648 * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path; 649 * walk down DTM reconstructing path) might be considerably faster 650 * on later nodes in large documents. That might also imply improving 651 * this call to handle nodes which would be in this DTM but 652 * have not yet been built, which might or might not be a Good Thing.</p> 653 * 654 * %REVIEW% This relies on being able to test node-identity via 655 * object-identity. DTM2DOM proxying is a great example of a case where 656 * that doesn't work. DOM Level 3 will provide the isSameNode() method 657 * to fix that, but until then this is going to be flaky. 658 * 659 * @param node A node, which may be null. 660 * 661 * @return The node handle or <code>DTM.NULL</code>. 662 */ 663 private int getHandleFromNode(Node node) 664 { 665 if (null != node) 666 { 667 int len = m_nodes.size(); 668 boolean isMore; 669 int i = 0; 670 do 671 { 672 for (; i < len; i++) 673 { 674 if (m_nodes.get(i) == node) 675 return makeNodeHandle(i); 676 } 677 678 isMore = nextNode(); 679 680 len = m_nodes.size(); 681 682 } 683 while(isMore || i < len); 684 } 685 686 return DTM.NULL; 687 } 688 689 /** Get the handle from a Node. This is a more robust version of 690 * getHandleFromNode, intended to be usable by the public. 691 * 692 * <p>%OPT% This will be pretty slow.</p> 693 * 694 * %REVIEW% This relies on being able to test node-identity via 695 * object-identity. DTM2DOM proxying is a great example of a case where 696 * that doesn't work. DOM Level 3 will provide the isSameNode() method 697 * to fix that, but until then this is going to be flaky. 698 * 699 * @param node A node, which may be null. 700 * 701 * @return The node handle or <code>DTM.NULL</code>. */ 702 public int getHandleOfNode(Node node) 703 { 704 if (null != node) 705 { 706 // Is Node actually within the same document? If not, don't search! 707 // This would be easier if m_root was always the Document node, but 708 // we decided to allow wrapping a DTM around a subtree. 709 if((m_root==node) || 710 (m_root.getNodeType()==DOCUMENT_NODE && 711 m_root==node.getOwnerDocument()) || 712 (m_root.getNodeType()!=DOCUMENT_NODE && 713 m_root.getOwnerDocument()==node.getOwnerDocument()) 714 ) 715 { 716 // If node _is_ in m_root's tree, find its handle 717 // 718 // %OPT% This check may be improved significantly when DOM 719 // Level 3 nodeKey and relative-order tests become 720 // available! 721 for(Node cursor=node; 722 cursor!=null; 723 cursor= 724 (cursor.getNodeType()!=ATTRIBUTE_NODE) 725 ? cursor.getParentNode() 726 : ((org.w3c.dom.Attr)cursor).getOwnerElement()) 727 { 728 if(cursor==m_root) 729 // We know this node; find its handle. 730 return getHandleFromNode(node); 731 } // for ancestors of node 732 } // if node and m_root in same Document 733 } // if node!=null 734 735 return DTM.NULL; 736 } 737 738 /** 739 * Retrieves an attribute node by by qualified name and namespace URI. 740 * 741 * @param nodeHandle int Handle of the node upon which to look up this attribute.. 742 * @param namespaceURI The namespace URI of the attribute to 743 * retrieve, or null. 744 * @param name The local name of the attribute to 745 * retrieve. 746 * @return The attribute node handle with the specified name ( 747 * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such 748 * attribute. 749 */ 750 public int getAttributeNode(int nodeHandle, String namespaceURI, 751 String name) 752 { 753 754 // %OPT% This is probably slower than it needs to be. 755 if (null == namespaceURI) 756 namespaceURI = ""; 757 758 int type = getNodeType(nodeHandle); 759 760 if (DTM.ELEMENT_NODE == type) 761 { 762 763 // Assume that attributes immediately follow the element. 764 int identity = makeNodeIdentity(nodeHandle); 765 766 while (DTM.NULL != (identity = getNextNodeIdentity(identity))) 767 { 768 // Assume this can not be null. 769 type = _type(identity); 770 771 // %REVIEW% 772 // Should namespace nodes be retrievable DOM-style as attrs? 773 // If not we need a separate function... which may be desirable 774 // architecturally, but which is ugly from a code point of view. 775 // (If we REALLY insist on it, this code should become a subroutine 776 // of both -- retrieve the node, then test if the type matches 777 // what you're looking for.) 778 if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE) 779 { 780 Node node = lookupNode(identity); 781 String nodeuri = node.getNamespaceURI(); 782 783 if (null == nodeuri) 784 nodeuri = ""; 785 786 String nodelocalname = node.getLocalName(); 787 788 if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) 789 return makeNodeHandle(identity); 790 } 791 792 else // if (DTM.NAMESPACE_NODE != type) 793 { 794 break; 795 } 796 } 797 } 798 799 return DTM.NULL; 800 } 801 802 /** 803 * Get the string-value of a node as a String object 804 * (see http://www.w3.org/TR/xpath#data-model 805 * for the definition of a node's string-value). 806 * 807 * @param nodeHandle The node ID. 808 * 809 * @return A string object that represents the string-value of the given node. 810 */ 811 public XMLString getStringValue(int nodeHandle) 812 { 813 814 int type = getNodeType(nodeHandle); 815 Node node = getNode(nodeHandle); 816 // %TBD% If an element only has one text node, we should just use it 817 // directly. 818 if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type 819 || DTM.DOCUMENT_FRAGMENT_NODE == type) 820 { 821 FastStringBuffer buf = StringBufferPool.get(); 822 String s; 823 824 try 825 { 826 getNodeData(node, buf); 827 828 s = (buf.length() > 0) ? buf.toString() : ""; 829 } 830 finally 831 { 832 StringBufferPool.free(buf); 833 } 834 835 return m_xstrf.newstr( s ); 836 } 837 else if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 838 { 839 // If this is a DTM text node, it may be made of multiple DOM text 840 // nodes -- including navigating into Entity References. DOM2DTM 841 // records the first node in the sequence and requires that we 842 // pick up the others when we retrieve the DTM node's value. 843 // 844 // %REVIEW% DOM Level 3 is expected to add a "whole text" 845 // retrieval method which performs this function for us. 846 FastStringBuffer buf = StringBufferPool.get(); 847 while(node!=null) 848 { 849 buf.append(node.getNodeValue()); 850 node=logicalNextDOMTextNode(node); 851 } 852 String s=(buf.length() > 0) ? buf.toString() : ""; 853 StringBufferPool.free(buf); 854 return m_xstrf.newstr( s ); 855 } 856 else 857 return m_xstrf.newstr( node.getNodeValue() ); 858 } 859 860 /** 861 * Determine if the string-value of a node is whitespace 862 * 863 * @param nodeHandle The node Handle. 864 * 865 * @return Return true if the given node is whitespace. 866 */ 867 public boolean isWhitespace(int nodeHandle) 868 { 869 int type = getNodeType(nodeHandle); 870 Node node = getNode(nodeHandle); 871 if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 872 { 873 // If this is a DTM text node, it may be made of multiple DOM text 874 // nodes -- including navigating into Entity References. DOM2DTM 875 // records the first node in the sequence and requires that we 876 // pick up the others when we retrieve the DTM node's value. 877 // 878 // %REVIEW% DOM Level 3 is expected to add a "whole text" 879 // retrieval method which performs this function for us. 880 FastStringBuffer buf = StringBufferPool.get(); 881 while(node!=null) 882 { 883 buf.append(node.getNodeValue()); 884 node=logicalNextDOMTextNode(node); 885 } 886 boolean b = buf.isWhitespace(0, buf.length()); 887 StringBufferPool.free(buf); 888 return b; 889 } 890 return false; 891 } 892 893 /** 894 * Retrieve the text content of a DOM subtree, appending it into a 895 * user-supplied FastStringBuffer object. Note that attributes are 896 * not considered part of the content of an element. 897 * <p> 898 * There are open questions regarding whitespace stripping. 899 * Currently we make no special effort in that regard, since the standard 900 * DOM doesn't yet provide DTD-based information to distinguish 901 * whitespace-in-element-context from genuine #PCDATA. Note that we 902 * should probably also consider xml:space if/when we address this. 903 * DOM Level 3 may solve the problem for us. 904 * <p> 905 * %REVIEW% Actually, since this method operates on the DOM side of the 906 * fence rather than the DTM side, it SHOULDN'T do 907 * any special handling. The DOM does what the DOM does; if you want 908 * DTM-level abstractions, use DTM-level methods. 909 * 910 * @param node Node whose subtree is to be walked, gathering the 911 * contents of all Text or CDATASection nodes. 912 * @param buf FastStringBuffer into which the contents of the text 913 * nodes are to be concatenated. 914 */ 915 protected static void getNodeData(Node node, FastStringBuffer buf) 916 { 917 918 switch (node.getNodeType()) 919 { 920 case Node.DOCUMENT_FRAGMENT_NODE : 921 case Node.DOCUMENT_NODE : 922 case Node.ELEMENT_NODE : 923 { 924 for (Node child = node.getFirstChild(); null != child; 925 child = child.getNextSibling()) 926 { 927 getNodeData(child, buf); 928 } 929 } 930 break; 931 case Node.TEXT_NODE : 932 case Node.CDATA_SECTION_NODE : 933 case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node 934 buf.append(node.getNodeValue()); 935 break; 936 case Node.PROCESSING_INSTRUCTION_NODE : 937 // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); 938 break; 939 default : 940 // ignore 941 break; 942 } 943 } 944 945 /** 946 * Given a node handle, return its DOM-style node name. This will 947 * include names such as #text or #document. 948 * 949 * @param nodeHandle the id of the node. 950 * @return String Name of this node, which may be an empty string. 951 * %REVIEW% Document when empty string is possible... 952 * %REVIEW-COMMENT% It should never be empty, should it? 953 */ 954 public String getNodeName(int nodeHandle) 955 { 956 957 Node node = getNode(nodeHandle); 958 959 // Assume non-null. 960 return node.getNodeName(); 961 } 962 963 /** 964 * Given a node handle, return the XPath node name. This should be 965 * the name as described by the XPath data model, NOT the DOM-style 966 * name. 967 * 968 * @param nodeHandle the id of the node. 969 * @return String Name of this node, which may be an empty string. 970 */ 971 public String getNodeNameX(int nodeHandle) 972 { 973 974 String name; 975 short type = getNodeType(nodeHandle); 976 977 switch (type) 978 { 979 case DTM.NAMESPACE_NODE : 980 { 981 Node node = getNode(nodeHandle); 982 983 // assume not null. 984 name = node.getNodeName(); 985 if(name.startsWith("xmlns:")) 986 { 987 name = QName.getLocalPart(name); 988 } 989 else if(name.equals("xmlns")) 990 { 991 name = ""; 992 } 993 } 994 break; 995 case DTM.ATTRIBUTE_NODE : 996 case DTM.ELEMENT_NODE : 997 case DTM.ENTITY_REFERENCE_NODE : 998 case DTM.PROCESSING_INSTRUCTION_NODE : 999 { 1000 Node node = getNode(nodeHandle); 1001 1002 // assume not null. 1003 name = node.getNodeName(); 1004 } 1005 break; 1006 default : 1007 name = ""; 1008 } 1009 1010 return name; 1011 } 1012 1013 /** 1014 * Given a node handle, return its XPath-style localname. 1015 * (As defined in Namespaces, this is the portion of the name after any 1016 * colon character). 1017 * 1018 * @param nodeHandle the id of the node. 1019 * @return String Local name of this node. 1020 */ 1021 public String getLocalName(int nodeHandle) 1022 { 1023 if(JJK_NEWCODE) 1024 { 1025 int id=makeNodeIdentity(nodeHandle); 1026 if(NULL==id) return null; 1027 Node newnode=m_nodes.get(id); 1028 String newname=newnode.getLocalName(); 1029 if (null == newname) 1030 { 1031 // XSLT treats PIs, and possibly other things, as having QNames. 1032 String qname = newnode.getNodeName(); 1033 if('#'==qname.charAt(0)) 1034 { 1035 // Match old default for this function 1036 // This conversion may or may not be necessary 1037 newname=""; 1038 } 1039 else 1040 { 1041 int index = qname.indexOf(':'); 1042 newname = (index < 0) ? qname : qname.substring(index + 1); 1043 } 1044 } 1045 return newname; 1046 } 1047 else 1048 { 1049 String name; 1050 short type = getNodeType(nodeHandle); 1051 switch (type) 1052 { 1053 case DTM.ATTRIBUTE_NODE : 1054 case DTM.ELEMENT_NODE : 1055 case DTM.ENTITY_REFERENCE_NODE : 1056 case DTM.NAMESPACE_NODE : 1057 case DTM.PROCESSING_INSTRUCTION_NODE : 1058 { 1059 Node node = getNode(nodeHandle); 1060 1061 // assume not null. 1062 name = node.getLocalName(); 1063 1064 if (null == name) 1065 { 1066 String qname = node.getNodeName(); 1067 int index = qname.indexOf(':'); 1068 1069 name = (index < 0) ? qname : qname.substring(index + 1); 1070 } 1071 } 1072 break; 1073 default : 1074 name = ""; 1075 } 1076 return name; 1077 } 1078 } 1079 1080 /** 1081 * Given a namespace handle, return the prefix that the namespace decl is 1082 * mapping. 1083 * Given a node handle, return the prefix used to map to the namespace. 1084 * 1085 * <p> %REVIEW% Are you sure you want "" for no prefix? </p> 1086 * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p> 1087 * 1088 * @param nodeHandle the id of the node. 1089 * @return String prefix of this node's name, or "" if no explicit 1090 * namespace prefix was given. 1091 */ 1092 public String getPrefix(int nodeHandle) 1093 { 1094 1095 String prefix; 1096 short type = getNodeType(nodeHandle); 1097 1098 switch (type) 1099 { 1100 case DTM.NAMESPACE_NODE : 1101 { 1102 Node node = getNode(nodeHandle); 1103 1104 // assume not null. 1105 String qname = node.getNodeName(); 1106 int index = qname.indexOf(':'); 1107 1108 prefix = (index < 0) ? "" : qname.substring(index + 1); 1109 } 1110 break; 1111 case DTM.ATTRIBUTE_NODE : 1112 case DTM.ELEMENT_NODE : 1113 { 1114 Node node = getNode(nodeHandle); 1115 1116 // assume not null. 1117 String qname = node.getNodeName(); 1118 int index = qname.indexOf(':'); 1119 1120 prefix = (index < 0) ? "" : qname.substring(0, index); 1121 } 1122 break; 1123 default : 1124 prefix = ""; 1125 } 1126 1127 return prefix; 1128 } 1129 1130 /** 1131 * Given a node handle, return its DOM-style namespace URI 1132 * (As defined in Namespaces, this is the declared URI which this node's 1133 * prefix -- or default in lieu thereof -- was mapped to.) 1134 * 1135 * <p>%REVIEW% Null or ""? -sb</p> 1136 * 1137 * @param nodeHandle the id of the node. 1138 * @return String URI value of this node's namespace, or null if no 1139 * namespace was resolved. 1140 */ 1141 public String getNamespaceURI(int nodeHandle) 1142 { 1143 if(JJK_NEWCODE) 1144 { 1145 int id=makeNodeIdentity(nodeHandle); 1146 if(id==NULL) return null; 1147 Node node=m_nodes.get(id); 1148 return node.getNamespaceURI(); 1149 } 1150 else 1151 { 1152 String nsuri; 1153 short type = getNodeType(nodeHandle); 1154 1155 switch (type) 1156 { 1157 case DTM.ATTRIBUTE_NODE : 1158 case DTM.ELEMENT_NODE : 1159 case DTM.ENTITY_REFERENCE_NODE : 1160 case DTM.NAMESPACE_NODE : 1161 case DTM.PROCESSING_INSTRUCTION_NODE : 1162 { 1163 Node node = getNode(nodeHandle); 1164 1165 // assume not null. 1166 nsuri = node.getNamespaceURI(); 1167 1168 // %TBD% Handle DOM1? 1169 } 1170 break; 1171 default : 1172 nsuri = null; 1173 } 1174 1175 return nsuri; 1176 } 1177 1178 } 1179 1180 /** Utility function: Given a DOM Text node, determine whether it is 1181 * logically followed by another Text or CDATASection node. This may 1182 * involve traversing into Entity References. 1183 * 1184 * %REVIEW% DOM Level 3 is expected to add functionality which may 1185 * allow us to retire this. 1186 */ 1187 private Node logicalNextDOMTextNode(Node n) 1188 { 1189 Node p=n.getNextSibling(); 1190 if(p==null) 1191 { 1192 // Walk out of any EntityReferenceNodes that ended with text 1193 for(n=n.getParentNode(); 1194 n!=null && ENTITY_REFERENCE_NODE == n.getNodeType(); 1195 n=n.getParentNode()) 1196 { 1197 p=n.getNextSibling(); 1198 if(p!=null) 1199 break; 1200 } 1201 } 1202 n=p; 1203 while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType()) 1204 { 1205 // Walk into any EntityReferenceNodes that start with text 1206 if(n.hasChildNodes()) 1207 n=n.getFirstChild(); 1208 else 1209 n=n.getNextSibling(); 1210 } 1211 if(n!=null) 1212 { 1213 // Found a logical next sibling. Is it text? 1214 int ntype=n.getNodeType(); 1215 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) 1216 n=null; 1217 } 1218 return n; 1219 } 1220 1221 /** 1222 * Given a node handle, return its node value. This is mostly 1223 * as defined by the DOM, but may ignore some conveniences. 1224 * <p> 1225 * 1226 * @param nodeHandle The node id. 1227 * @return String Value of this node, or null if not 1228 * meaningful for this node type. 1229 */ 1230 public String getNodeValue(int nodeHandle) 1231 { 1232 // The _type(nodeHandle) call was taking the lion's share of our 1233 // time, and was wrong anyway since it wasn't coverting handle to 1234 // identity. Inlined it. 1235 int type = _exptype(makeNodeIdentity(nodeHandle)); 1236 type=(NULL != type) ? getNodeType(nodeHandle) : NULL; 1237 1238 if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type) 1239 return getNode(nodeHandle).getNodeValue(); 1240 1241 // If this is a DTM text node, it may be made of multiple DOM text 1242 // nodes -- including navigating into Entity References. DOM2DTM 1243 // records the first node in the sequence and requires that we 1244 // pick up the others when we retrieve the DTM node's value. 1245 // 1246 // %REVIEW% DOM Level 3 is expected to add a "whole text" 1247 // retrieval method which performs this function for us. 1248 Node node = getNode(nodeHandle); 1249 Node n=logicalNextDOMTextNode(node); 1250 if(n==null) 1251 return node.getNodeValue(); 1252 1253 FastStringBuffer buf = StringBufferPool.get(); 1254 buf.append(node.getNodeValue()); 1255 while(n!=null) 1256 { 1257 buf.append(n.getNodeValue()); 1258 n=logicalNextDOMTextNode(n); 1259 } 1260 String s = (buf.length() > 0) ? buf.toString() : ""; 1261 StringBufferPool.free(buf); 1262 return s; 1263 } 1264 1265 /** 1266 * A document type declaration information item has the following properties: 1267 * 1268 * 1. [system identifier] The system identifier of the external subset, if 1269 * it exists. Otherwise this property has no value. 1270 * 1271 * @return the system identifier String object, or null if there is none. 1272 */ 1273 public String getDocumentTypeDeclarationSystemIdentifier() 1274 { 1275 1276 Document doc; 1277 1278 if (m_root.getNodeType() == Node.DOCUMENT_NODE) 1279 doc = (Document) m_root; 1280 else 1281 doc = m_root.getOwnerDocument(); 1282 1283 if (null != doc) 1284 { 1285 DocumentType dtd = doc.getDoctype(); 1286 1287 if (null != dtd) 1288 { 1289 return dtd.getSystemId(); 1290 } 1291 } 1292 1293 return null; 1294 } 1295 1296 /** 1297 * Return the public identifier of the external subset, 1298 * normalized as described in 4.2.2 External Entities [XML]. If there is 1299 * no external subset or if it has no public identifier, this property 1300 * has no value. 1301 * 1302 * @return the public identifier String object, or null if there is none. 1303 */ 1304 public String getDocumentTypeDeclarationPublicIdentifier() 1305 { 1306 1307 Document doc; 1308 1309 if (m_root.getNodeType() == Node.DOCUMENT_NODE) 1310 doc = (Document) m_root; 1311 else 1312 doc = m_root.getOwnerDocument(); 1313 1314 if (null != doc) 1315 { 1316 DocumentType dtd = doc.getDoctype(); 1317 1318 if (null != dtd) 1319 { 1320 return dtd.getPublicId(); 1321 } 1322 } 1323 1324 return null; 1325 } 1326 1327 /** 1328 * Returns the <code>Element</code> whose <code>ID</code> is given by 1329 * <code>elementId</code>. If no such element exists, returns 1330 * <code>DTM.NULL</code>. Behavior is not defined if more than one element 1331 * has this <code>ID</code>. Attributes (including those 1332 * with the name "ID") are not of type ID unless so defined by DTD/Schema 1333 * information available to the DTM implementation. 1334 * Implementations that do not know whether attributes are of type ID or 1335 * not are expected to return <code>DTM.NULL</code>. 1336 * 1337 * <p>%REVIEW% Presumably IDs are still scoped to a single document, 1338 * and this operation searches only within a single document, right? 1339 * Wouldn't want collisions between DTMs in the same process.</p> 1340 * 1341 * @param elementId The unique <code>id</code> value for an element. 1342 * @return The handle of the matching element. 1343 */ 1344 public int getElementById(String elementId) 1345 { 1346 1347 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 1348 ? (Document) m_root : m_root.getOwnerDocument(); 1349 1350 if(null != doc) 1351 { 1352 Node elem = doc.getElementById(elementId); 1353 if(null != elem) 1354 { 1355 int elemHandle = getHandleFromNode(elem); 1356 1357 if(DTM.NULL == elemHandle) 1358 { 1359 int identity = m_nodes.size()-1; 1360 while (DTM.NULL != (identity = getNextNodeIdentity(identity))) 1361 { 1362 Node node = getNode(identity); 1363 if(node == elem) 1364 { 1365 elemHandle = getHandleFromNode(elem); 1366 break; 1367 } 1368 } 1369 } 1370 1371 return elemHandle; 1372 } 1373 1374 } 1375 return DTM.NULL; 1376 } 1377 1378 /** 1379 * The getUnparsedEntityURI function returns the URI of the unparsed 1380 * entity with the specified name in the same document as the context 1381 * node (see [3.3 Unparsed Entities]). It returns the empty string if 1382 * there is no such entity. 1383 * <p> 1384 * XML processors may choose to use the System Identifier (if one 1385 * is provided) to resolve the entity, rather than the URI in the 1386 * Public Identifier. The details are dependent on the processor, and 1387 * we would have to support some form of plug-in resolver to handle 1388 * this properly. Currently, we simply return the System Identifier if 1389 * present, and hope that it a usable URI or that our caller can 1390 * map it to one. 1391 * TODO: Resolve Public Identifiers... or consider changing function name. 1392 * <p> 1393 * If we find a relative URI 1394 * reference, XML expects it to be resolved in terms of the base URI 1395 * of the document. The DOM doesn't do that for us, and it isn't 1396 * entirely clear whether that should be done here; currently that's 1397 * pushed up to a higher level of our application. (Note that DOM Level 1398 * 1 didn't store the document's base URI.) 1399 * TODO: Consider resolving Relative URIs. 1400 * <p> 1401 * (The DOM's statement that "An XML processor may choose to 1402 * completely expand entities before the structure model is passed 1403 * to the DOM" refers only to parsed entities, not unparsed, and hence 1404 * doesn't affect this function.) 1405 * 1406 * @param name A string containing the Entity Name of the unparsed 1407 * entity. 1408 * 1409 * @return String containing the URI of the Unparsed Entity, or an 1410 * empty string if no such entity exists. 1411 */ 1412 public String getUnparsedEntityURI(String name) 1413 { 1414 1415 String url = ""; 1416 Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 1417 ? (Document) m_root : m_root.getOwnerDocument(); 1418 1419 if (null != doc) 1420 { 1421 DocumentType doctype = doc.getDoctype(); 1422 1423 if (null != doctype) 1424 { 1425 NamedNodeMap entities = doctype.getEntities(); 1426 if(null == entities) 1427 return url; 1428 Entity entity = (Entity) entities.getNamedItem(name); 1429 if(null == entity) 1430 return url; 1431 1432 String notationName = entity.getNotationName(); 1433 1434 if (null != notationName) // then it's unparsed 1435 { 1436 // The draft says: "The XSLT processor may use the public 1437 // identifier to generate a URI for the entity instead of the URI 1438 // specified in the system identifier. If the XSLT processor does 1439 // not use the public identifier to generate the URI, it must use 1440 // the system identifier; if the system identifier is a relative 1441 // URI, it must be resolved into an absolute URI using the URI of 1442 // the resource containing the entity declaration as the base 1443 // URI [RFC2396]." 1444 // So I'm falling a bit short here. 1445 url = entity.getSystemId(); 1446 1447 if (null == url) 1448 { 1449 url = entity.getPublicId(); 1450 } 1451 else 1452 { 1453 // This should be resolved to an absolute URL, but that's hard 1454 // to do from here. 1455 } 1456 } 1457 } 1458 } 1459 1460 return url; 1461 } 1462 1463 /** 1464 * 5. [specified] A flag indicating whether this attribute was actually 1465 * specified in the start-tag of its element, or was defaulted from the 1466 * DTD. 1467 * 1468 * @param attributeHandle the attribute handle 1469 * @return <code>true</code> if the attribute was specified; 1470 * <code>false</code> if it was defaulted. 1471 */ 1472 public boolean isAttributeSpecified(int attributeHandle) 1473 { 1474 int type = getNodeType(attributeHandle); 1475 1476 if (DTM.ATTRIBUTE_NODE == type) 1477 { 1478 Attr attr = (Attr)getNode(attributeHandle); 1479 return attr.getSpecified(); 1480 } 1481 return false; 1482 } 1483 1484 /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since 1485 * we're wrapped around an existing DOM. 1486 * 1487 * @param source The IncrementalSAXSource that we want to recieve events from 1488 * on demand. 1489 */ 1490 public void setIncrementalSAXSource(IncrementalSAXSource source) 1491 { 1492 } 1493 1494 /** getContentHandler returns "our SAX builder" -- the thing that 1495 * someone else should send SAX events to in order to extend this 1496 * DTM model. 1497 * 1498 * @return null if this model doesn't respond to SAX events, 1499 * "this" if the DTM object has a built-in SAX ContentHandler, 1500 * the IncrmentalSAXSource if we're bound to one and should receive 1501 * the SAX stream via it for incremental build purposes... 1502 * */ 1503 public org.xml.sax.ContentHandler getContentHandler() 1504 { 1505 return null; 1506 } 1507 1508 /** 1509 * Return this DTM's lexical handler. 1510 * 1511 * %REVIEW% Should this return null if constrution already done/begun? 1512 * 1513 * @return null if this model doesn't respond to lexical SAX events, 1514 * "this" if the DTM object has a built-in SAX ContentHandler, 1515 * the IncrementalSAXSource if we're bound to one and should receive 1516 * the SAX stream via it for incremental build purposes... 1517 */ 1518 public org.xml.sax.ext.LexicalHandler getLexicalHandler() 1519 { 1520 1521 return null; 1522 } 1523 1524 1525 /** 1526 * Return this DTM's EntityResolver. 1527 * 1528 * @return null if this model doesn't respond to SAX entity ref events. 1529 */ 1530 public org.xml.sax.EntityResolver getEntityResolver() 1531 { 1532 1533 return null; 1534 } 1535 1536 /** 1537 * Return this DTM's DTDHandler. 1538 * 1539 * @return null if this model doesn't respond to SAX dtd events. 1540 */ 1541 public org.xml.sax.DTDHandler getDTDHandler() 1542 { 1543 1544 return null; 1545 } 1546 1547 /** 1548 * Return this DTM's ErrorHandler. 1549 * 1550 * @return null if this model doesn't respond to SAX error events. 1551 */ 1552 public org.xml.sax.ErrorHandler getErrorHandler() 1553 { 1554 1555 return null; 1556 } 1557 1558 /** 1559 * Return this DTM's DeclHandler. 1560 * 1561 * @return null if this model doesn't respond to SAX Decl events. 1562 */ 1563 public org.xml.sax.ext.DeclHandler getDeclHandler() 1564 { 1565 1566 return null; 1567 } 1568 1569 /** @return true iff we're building this model incrementally (eg 1570 * we're partnered with a IncrementalSAXSource) and thus require that the 1571 * transformation and the parse run simultaneously. Guidance to the 1572 * DTMManager. 1573 * */ 1574 public boolean needsTwoThreads() 1575 { 1576 return false; 1577 } 1578 1579 // ========== Direct SAX Dispatch, for optimization purposes ======== 1580 1581 /** 1582 * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition 1583 * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> 1584 * the definition of <CODE>S</CODE></A> for details. 1585 * @param ch Character to check as XML whitespace. 1586 * @return =true if <var>ch</var> is XML whitespace; otherwise =false. 1587 */ 1588 private static boolean isSpace(char ch) 1589 { 1590 return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. 1591 } 1592 1593 /** 1594 * Directly call the 1595 * characters method on the passed ContentHandler for the 1596 * string-value of the given node (see http://www.w3.org/TR/xpath#data-model 1597 * for the definition of a node's string-value). Multiple calls to the 1598 * ContentHandler's characters methods may well occur for a single call to 1599 * this method. 1600 * 1601 * @param nodeHandle The node ID. 1602 * @param ch A non-null reference to a ContentHandler. 1603 * 1604 * @throws org.xml.sax.SAXException 1605 */ 1606 public void dispatchCharactersEvents( 1607 int nodeHandle, org.xml.sax.ContentHandler ch, 1608 boolean normalize) 1609 throws org.xml.sax.SAXException 1610 { 1611 if(normalize) 1612 { 1613 XMLString str = getStringValue(nodeHandle); 1614 str = str.fixWhiteSpace(true, true, false); 1615 str.dispatchCharactersEvents(ch); 1616 } 1617 else 1618 { 1619 int type = getNodeType(nodeHandle); 1620 Node node = getNode(nodeHandle); 1621 dispatchNodeData(node, ch, 0); 1622 // Text coalition -- a DTM text node may represent multiple 1623 // DOM nodes. 1624 if(TEXT_NODE == type || CDATA_SECTION_NODE == type) 1625 { 1626 while( null != (node=logicalNextDOMTextNode(node)) ) 1627 { 1628 dispatchNodeData(node, ch, 0); 1629 } 1630 } 1631 } 1632 } 1633 1634 /** 1635 * Retrieve the text content of a DOM subtree, appending it into a 1636 * user-supplied FastStringBuffer object. Note that attributes are 1637 * not considered part of the content of an element. 1638 * <p> 1639 * There are open questions regarding whitespace stripping. 1640 * Currently we make no special effort in that regard, since the standard 1641 * DOM doesn't yet provide DTD-based information to distinguish 1642 * whitespace-in-element-context from genuine #PCDATA. Note that we 1643 * should probably also consider xml:space if/when we address this. 1644 * DOM Level 3 may solve the problem for us. 1645 * <p> 1646 * %REVIEW% Note that as a DOM-level operation, it can be argued that this 1647 * routine _shouldn't_ perform any processing beyond what the DOM already 1648 * does, and that whitespace stripping and so on belong at the DTM level. 1649 * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. 1650 * 1651 * @param node Node whose subtree is to be walked, gathering the 1652 * contents of all Text or CDATASection nodes. 1653 */ 1654 @SuppressWarnings("fallthrough") 1655 protected static void dispatchNodeData(Node node, 1656 org.xml.sax.ContentHandler ch, 1657 int depth) 1658 throws org.xml.sax.SAXException 1659 { 1660 1661 switch (node.getNodeType()) 1662 { 1663 case Node.DOCUMENT_FRAGMENT_NODE : 1664 case Node.DOCUMENT_NODE : 1665 case Node.ELEMENT_NODE : 1666 { 1667 for (Node child = node.getFirstChild(); null != child; 1668 child = child.getNextSibling()) 1669 { 1670 dispatchNodeData(child, ch, depth+1); 1671 } 1672 } 1673 break; 1674 case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% 1675 case Node.COMMENT_NODE : 1676 if(0 != depth) 1677 break; 1678 // NOTE: Because this operation works in the DOM space, it does _not_ attempt 1679 // to perform Text Coalition. That should only be done in DTM space. 1680 case Node.TEXT_NODE : 1681 case Node.CDATA_SECTION_NODE : 1682 case Node.ATTRIBUTE_NODE : 1683 String str = node.getNodeValue(); 1684 if(ch instanceof CharacterNodeHandler) 1685 { 1686 ((CharacterNodeHandler)ch).characters(node); 1687 } 1688 else 1689 { 1690 ch.characters(str.toCharArray(), 0, str.length()); 1691 } 1692 break; 1693 // /* case Node.PROCESSING_INSTRUCTION_NODE : 1694 // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); 1695 // break; */ 1696 default : 1697 // ignore 1698 break; 1699 } 1700 } 1701 1702 TreeWalker m_walker = new TreeWalker(null); 1703 1704 /** 1705 * Directly create SAX parser events from a subtree. 1706 * 1707 * @param nodeHandle The node ID. 1708 * @param ch A non-null reference to a ContentHandler. 1709 * 1710 * @throws org.xml.sax.SAXException 1711 */ 1712 public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) 1713 throws org.xml.sax.SAXException 1714 { 1715 TreeWalker treeWalker = m_walker; 1716 ContentHandler prevCH = treeWalker.getContentHandler(); 1717 1718 if(null != prevCH) 1719 { 1720 treeWalker = new TreeWalker(null); 1721 } 1722 treeWalker.setContentHandler(ch); 1723 1724 try 1725 { 1726 Node node = getNode(nodeHandle); 1727 treeWalker.traverseFragment(node); 1728 } 1729 finally 1730 { 1731 treeWalker.setContentHandler(null); 1732 } 1733 } 1734 1735 public interface CharacterNodeHandler 1736 { 1737 public void characters(Node node) 1738 throws org.xml.sax.SAXException; 1739 } 1740 1741 /** 1742 * For the moment all the run time properties are ignored by this 1743 * class. 1744 * 1745 * @param property a <code>String</code> value 1746 * @param value an <code>Object</code> value 1747 */ 1748 public void setProperty(String property, Object value) 1749 { 1750 } 1751 1752 /** 1753 * No source information is available for DOM2DTM, so return 1754 * <code>null</code> here. 1755 * 1756 * @param node an <code>int</code> value 1757 * @return null 1758 */ 1759 public SourceLocator getSourceLocatorFor(int node) 1760 { 1761 return null; 1762 } 1763 1764 }