1 /* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.serializer.dom3; 22 23 import com.sun.org.apache.xerces.internal.util.XML11Char; 24 import com.sun.org.apache.xerces.internal.util.XMLChar; 25 import com.sun.org.apache.xml.internal.serializer.OutputPropertiesFactory; 26 import com.sun.org.apache.xml.internal.serializer.SerializationHandler; 27 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 28 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 29 import java.io.IOException; 30 import java.io.Writer; 31 import java.util.Collections; 32 import java.util.Enumeration; 33 import java.util.HashMap; 34 import java.util.Map; 35 import java.util.Properties; 36 import org.w3c.dom.Attr; 37 import org.w3c.dom.CDATASection; 38 import org.w3c.dom.Comment; 39 import org.w3c.dom.DOMError; 40 import org.w3c.dom.DOMErrorHandler; 41 import org.w3c.dom.Document; 42 import org.w3c.dom.DocumentType; 43 import org.w3c.dom.Element; 44 import org.w3c.dom.Entity; 45 import org.w3c.dom.EntityReference; 46 import org.w3c.dom.NamedNodeMap; 47 import org.w3c.dom.Node; 48 import org.w3c.dom.NodeList; 49 import org.w3c.dom.ProcessingInstruction; 50 import org.w3c.dom.Text; 51 import org.w3c.dom.ls.LSSerializerFilter; 52 import org.w3c.dom.traversal.NodeFilter; 53 import org.xml.sax.Locator; 54 import org.xml.sax.SAXException; 55 import org.xml.sax.ext.LexicalHandler; 56 import org.xml.sax.helpers.LocatorImpl; 57 58 /** 59 * Built on org.apache.xml.serializer.TreeWalker and adds functionality to 60 * traverse and serialize a DOM Node (Level 2 or Level 3) as specified in 61 * the DOM Level 3 LS Recommedation by evaluating and applying DOMConfiguration 62 * parameters and filters if any during serialization. 63 * 64 * @xsl.usage internal 65 * @LastModified: Oct 2017 66 */ 67 final class DOM3TreeWalker { 68 69 /** 70 * The SerializationHandler, it extends ContentHandler and when 71 * this class is instantiated via the constructor provided, a 72 * SerializationHandler object is passed to it. 73 */ 74 private SerializationHandler fSerializer = null; 75 76 /** We do not need DOM2Helper since DOM Level 3 LS applies to DOM Level 2 or newer */ 77 78 /** Locator object for this TreeWalker */ 79 private LocatorImpl fLocator = new LocatorImpl(); 80 81 /** ErrorHandler */ 82 private DOMErrorHandler fErrorHandler = null; 83 84 /** LSSerializerFilter */ 85 private LSSerializerFilter fFilter = null; 86 87 /** If the serializer is an instance of a LexicalHandler */ 88 private LexicalHandler fLexicalHandler = null; 89 90 private int fWhatToShowFilter; 91 92 /** New Line character to use in serialization */ 93 private String fNewLine = null; 94 95 /** DOMConfiguration Properties */ 96 private Properties fDOMConfigProperties = null; 97 98 /** Keeps track if we are in an entity reference when entities=true */ 99 private boolean fInEntityRef = false; 100 101 /** Stores the version of the XML document to be serialize */ 102 private String fXMLVersion = null; 103 104 /** XML Version, default 1.0 */ 105 private boolean fIsXMLVersion11 = false; 106 107 /** Is the Node a Level 3 DOM node */ 108 private boolean fIsLevel3DOM = false; 109 110 /** DOM Configuration Parameters */ 111 private int fFeatures = 0; 112 113 /** Flag indicating whether following text to be processed is raw text */ 114 boolean fNextIsRaw = false; 115 116 // 117 private static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/"; 118 119 // 120 private static final String XMLNS_PREFIX = "xmlns"; 121 122 // 123 private static final String XML_URI = "http://www.w3.org/XML/1998/namespace"; 124 125 // 126 private static final String XML_PREFIX = "xml"; 127 128 /** stores namespaces in scope */ 129 protected NamespaceSupport fNSBinder; 130 131 /** stores all namespace bindings on the current element */ 132 protected NamespaceSupport fLocalNSBinder; 133 134 /** stores the current element depth */ 135 private int fElementDepth = 0; 136 137 // *********************************************************************** 138 // DOMConfiguration paramter settings 139 // *********************************************************************** 140 // Parameter canonical-form, true [optional] - NOT SUPPORTED 141 private final static int CANONICAL = 0x1 << 0; 142 143 // Parameter cdata-sections, true [required] (default) 144 private final static int CDATA = 0x1 << 1; 145 146 // Parameter check-character-normalization, true [optional] - NOT SUPPORTED 147 private final static int CHARNORMALIZE = 0x1 << 2; 148 149 // Parameter comments, true [required] (default) 150 private final static int COMMENTS = 0x1 << 3; 151 152 // Parameter datatype-normalization, true [optional] - NOT SUPPORTED 153 private final static int DTNORMALIZE = 0x1 << 4; 154 155 // Parameter element-content-whitespace, true [required] (default) - value - false [optional] NOT SUPPORTED 156 private final static int ELEM_CONTENT_WHITESPACE = 0x1 << 5; 157 158 // Parameter entities, true [required] (default) 159 private final static int ENTITIES = 0x1 << 6; 160 161 // Parameter infoset, true [required] (default), false has no effect --> True has no effect for the serializer 162 private final static int INFOSET = 0x1 << 7; 163 164 // Parameter namespaces, true [required] (default) 165 private final static int NAMESPACES = 0x1 << 8; 166 167 // Parameter namespace-declarations, true [required] (default) 168 private final static int NAMESPACEDECLS = 0x1 << 9; 169 170 // Parameter normalize-characters, true [optional] - NOT SUPPORTED 171 private final static int NORMALIZECHARS = 0x1 << 10; 172 173 // Parameter split-cdata-sections, true [required] (default) 174 private final static int SPLITCDATA = 0x1 << 11; 175 176 // Parameter validate, true [optional] - NOT SUPPORTED 177 private final static int VALIDATE = 0x1 << 12; 178 179 // Parameter validate-if-schema, true [optional] - NOT SUPPORTED 180 private final static int SCHEMAVALIDATE = 0x1 << 13; 181 182 // Parameter split-cdata-sections, true [required] (default) 183 private final static int WELLFORMED = 0x1 << 14; 184 185 // Parameter discard-default-content, true [required] (default) 186 // Not sure how this will be used in level 2 Documents 187 private final static int DISCARDDEFAULT = 0x1 << 15; 188 189 // Parameter format-pretty-print, true [optional] 190 private final static int PRETTY_PRINT = 0x1 << 16; 191 192 // Parameter ignore-unknown-character-denormalizations, true [required] (default) 193 // We currently do not support XML 1.1 character normalization 194 private final static int IGNORE_CHAR_DENORMALIZE = 0x1 << 17; 195 196 // Parameter discard-default-content, true [required] (default) 197 private final static int XMLDECL = 0x1 << 18; 198 199 /** 200 * Constructor. 201 * @param contentHandler serialHandler The implemention of the SerializationHandler interface 202 */ 203 DOM3TreeWalker( 204 SerializationHandler serialHandler, 205 DOMErrorHandler errHandler, 206 LSSerializerFilter filter, 207 String newLine) { 208 fSerializer = serialHandler; 209 //fErrorHandler = errHandler == null ? new DOMErrorHandlerImpl() : errHandler; // Should we be using the default? 210 fErrorHandler = errHandler; 211 fFilter = filter; 212 fLexicalHandler = null; 213 fNewLine = newLine; 214 215 fNSBinder = new NamespaceSupport(); 216 fLocalNSBinder = new NamespaceSupport(); 217 218 fDOMConfigProperties = fSerializer.getOutputFormat(); 219 fSerializer.setDocumentLocator(fLocator); 220 initProperties(fDOMConfigProperties); 221 } 222 223 /** 224 * Perform a pre-order traversal non-recursive style. 225 * 226 * Note that TreeWalker assumes that the subtree is intended to represent 227 * a complete (though not necessarily well-formed) document and, during a 228 * traversal, startDocument and endDocument will always be issued to the 229 * SAX listener. 230 * 231 * @param pos Node in the tree where to start traversal 232 * 233 * @throws TransformerException 234 */ 235 public void traverse(Node pos) throws org.xml.sax.SAXException { 236 this.fSerializer.startDocument(); 237 238 // Determine if the Node is a DOM Level 3 Core Node. 239 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 240 Document ownerDoc = pos.getOwnerDocument(); 241 if (ownerDoc != null 242 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 243 fIsLevel3DOM = true; 244 } 245 } else { 246 if (((Document) pos) 247 .getImplementation() 248 .hasFeature("Core", "3.0")) { 249 fIsLevel3DOM = true; 250 } 251 } 252 253 if (fSerializer instanceof LexicalHandler) { 254 fLexicalHandler = ((LexicalHandler) this.fSerializer); 255 } 256 257 if (fFilter != null) 258 fWhatToShowFilter = fFilter.getWhatToShow(); 259 260 Node top = pos; 261 262 while (null != pos) { 263 startNode(pos); 264 265 Node nextNode = null; 266 267 nextNode = pos.getFirstChild(); 268 269 while (null == nextNode) { 270 endNode(pos); 271 272 if (top.equals(pos)) 273 break; 274 275 nextNode = pos.getNextSibling(); 276 277 if (null == nextNode) { 278 pos = pos.getParentNode(); 279 280 if ((null == pos) || (top.equals(pos))) { 281 if (null != pos) 282 endNode(pos); 283 284 nextNode = null; 285 286 break; 287 } 288 } 289 } 290 291 pos = nextNode; 292 } 293 this.fSerializer.endDocument(); 294 } 295 296 /** 297 * Perform a pre-order traversal non-recursive style. 298 299 * Note that TreeWalker assumes that the subtree is intended to represent 300 * a complete (though not necessarily well-formed) document and, during a 301 * traversal, startDocument and endDocument will always be issued to the 302 * SAX listener. 303 * 304 * @param pos Node in the tree where to start traversal 305 * @param top Node in the tree where to end traversal 306 * 307 * @throws TransformerException 308 */ 309 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException { 310 311 this.fSerializer.startDocument(); 312 313 // Determine if the Node is a DOM Level 3 Core Node. 314 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 315 Document ownerDoc = pos.getOwnerDocument(); 316 if (ownerDoc != null 317 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 318 fIsLevel3DOM = true; 319 } 320 } else { 321 if (((Document) pos) 322 .getImplementation() 323 .hasFeature("Core", "3.0")) { 324 fIsLevel3DOM = true; 325 } 326 } 327 328 if (fSerializer instanceof LexicalHandler) { 329 fLexicalHandler = ((LexicalHandler) this.fSerializer); 330 } 331 332 if (fFilter != null) 333 fWhatToShowFilter = fFilter.getWhatToShow(); 334 335 while (null != pos) { 336 startNode(pos); 337 338 Node nextNode = null; 339 340 nextNode = pos.getFirstChild(); 341 342 while (null == nextNode) { 343 endNode(pos); 344 345 if ((null != top) && top.equals(pos)) 346 break; 347 348 nextNode = pos.getNextSibling(); 349 350 if (null == nextNode) { 351 pos = pos.getParentNode(); 352 353 if ((null == pos) || ((null != top) && top.equals(pos))) { 354 nextNode = null; 355 356 break; 357 } 358 } 359 } 360 361 pos = nextNode; 362 } 363 this.fSerializer.endDocument(); 364 } 365 366 /** 367 * Optimized dispatch of characters. 368 */ 369 private final void dispatachChars(Node node) 370 throws org.xml.sax.SAXException { 371 if (fSerializer != null) { 372 String data = ((Text) node).getData(); 373 this.fSerializer.characters(data.toCharArray(), 0, data.length()); 374 } 375 } 376 377 /** 378 * Start processing given node 379 * 380 * @param node Node to process 381 * 382 * @throws org.xml.sax.SAXException 383 */ 384 protected void startNode(Node node) throws org.xml.sax.SAXException { 385 if (node instanceof Locator) { 386 Locator loc = (Locator) node; 387 fLocator.setColumnNumber(loc.getColumnNumber()); 388 fLocator.setLineNumber(loc.getLineNumber()); 389 fLocator.setPublicId(loc.getPublicId()); 390 fLocator.setSystemId(loc.getSystemId()); 391 } else { 392 fLocator.setColumnNumber(0); 393 fLocator.setLineNumber(0); 394 } 395 396 switch (node.getNodeType()) { 397 case Node.DOCUMENT_TYPE_NODE : 398 serializeDocType((DocumentType) node, true); 399 break; 400 case Node.COMMENT_NODE : 401 serializeComment((Comment) node); 402 break; 403 case Node.DOCUMENT_FRAGMENT_NODE : 404 // Children are traversed 405 break; 406 case Node.DOCUMENT_NODE : 407 break; 408 case Node.ELEMENT_NODE : 409 serializeElement((Element) node, true); 410 break; 411 case Node.PROCESSING_INSTRUCTION_NODE : 412 serializePI((ProcessingInstruction) node); 413 break; 414 case Node.CDATA_SECTION_NODE : 415 serializeCDATASection((CDATASection) node); 416 break; 417 case Node.TEXT_NODE : 418 serializeText((Text) node); 419 break; 420 case Node.ENTITY_REFERENCE_NODE : 421 serializeEntityReference((EntityReference) node, true); 422 break; 423 default : 424 } 425 } 426 427 /** 428 * End processing of given node 429 * 430 * 431 * @param node Node we just finished processing 432 * 433 * @throws org.xml.sax.SAXException 434 */ 435 protected void endNode(Node node) throws org.xml.sax.SAXException { 436 437 switch (node.getNodeType()) { 438 case Node.DOCUMENT_NODE : 439 break; 440 case Node.DOCUMENT_TYPE_NODE : 441 serializeDocType((DocumentType) node, false); 442 break; 443 case Node.ELEMENT_NODE : 444 serializeElement((Element) node, false); 445 break; 446 case Node.CDATA_SECTION_NODE : 447 break; 448 case Node.ENTITY_REFERENCE_NODE : 449 serializeEntityReference((EntityReference) node, false); 450 break; 451 default : 452 } 453 } 454 455 // *********************************************************************** 456 // Node serialization methods 457 // *********************************************************************** 458 /** 459 * Applies a filter on the node to serialize 460 * 461 * @param node The Node to serialize 462 * @return True if the node is to be serialized else false if the node 463 * is to be rejected or skipped. 464 */ 465 protected boolean applyFilter(Node node, int nodeType) { 466 if (fFilter != null && (fWhatToShowFilter & nodeType) != 0) { 467 468 short code = fFilter.acceptNode(node); 469 switch (code) { 470 case NodeFilter.FILTER_REJECT : 471 case NodeFilter.FILTER_SKIP : 472 return false; // skip the node 473 default : // fall through.. 474 } 475 } 476 return true; 477 } 478 479 /** 480 * Serializes a Document Type Node. 481 * 482 * @param node The Docuemnt Type Node to serialize 483 * @param bStart Invoked at the start or end of node. Default true. 484 */ 485 protected void serializeDocType(DocumentType node, boolean bStart) 486 throws SAXException { 487 // The DocType and internalSubset can not be modified in DOM and is 488 // considered to be well-formed as the outcome of successful parsing. 489 String docTypeName = node.getNodeName(); 490 String publicId = node.getPublicId(); 491 String systemId = node.getSystemId(); 492 String internalSubset = node.getInternalSubset(); 493 494 //DocumentType nodes are never passed to the filter 495 496 if (internalSubset != null && !"".equals(internalSubset)) { 497 498 if (bStart) { 499 try { 500 // The Serializer does not provide a way to write out the 501 // DOCTYPE internal subset via an event call, so we write it 502 // out here. 503 Writer writer = fSerializer.getWriter(); 504 StringBuffer dtd = new StringBuffer(); 505 506 dtd.append("<!DOCTYPE "); 507 dtd.append(docTypeName); 508 if (null != publicId) { 509 dtd.append(" PUBLIC \""); 510 dtd.append(publicId); 511 dtd.append('\"'); 512 } 513 514 if (null != systemId) { 515 if (null == publicId) { 516 dtd.append(" SYSTEM \""); 517 } else { 518 dtd.append(" \""); 519 } 520 dtd.append(systemId); 521 dtd.append('\"'); 522 } 523 524 dtd.append(" [ "); 525 526 dtd.append(fNewLine); 527 dtd.append(internalSubset); 528 dtd.append("]>"); 529 dtd.append(fNewLine); 530 531 writer.write(dtd.toString()); 532 writer.flush(); 533 534 } catch (IOException e) { 535 throw new SAXException(Utils.messages.createMessage( 536 MsgKey.ER_WRITING_INTERNAL_SUBSET, null), e); 537 } 538 } // else if !bStart do nothing 539 540 } else { 541 542 if (bStart) { 543 if (fLexicalHandler != null) { 544 fLexicalHandler.startDTD(docTypeName, publicId, systemId); 545 } 546 } else { 547 if (fLexicalHandler != null) { 548 fLexicalHandler.endDTD(); 549 } 550 } 551 } 552 } 553 554 /** 555 * Serializes a Comment Node. 556 * 557 * @param node The Comment Node to serialize 558 */ 559 protected void serializeComment(Comment node) throws SAXException { 560 // comments=true 561 if ((fFeatures & COMMENTS) != 0) { 562 String data = node.getData(); 563 564 // well-formed=true 565 if ((fFeatures & WELLFORMED) != 0) { 566 isCommentWellFormed(data); 567 } 568 569 if (fLexicalHandler != null) { 570 // apply the LSSerializer filter after the operations requested by the 571 // DOMConfiguration parameters have been applied 572 if (!applyFilter(node, NodeFilter.SHOW_COMMENT)) { 573 return; 574 } 575 576 fLexicalHandler.comment(data.toCharArray(), 0, data.length()); 577 } 578 } 579 } 580 581 /** 582 * Serializes an Element Node. 583 * 584 * @param node The Element Node to serialize 585 * @param bStart Invoked at the start or end of node. 586 */ 587 protected void serializeElement(Element node, boolean bStart) 588 throws SAXException { 589 if (bStart) { 590 fElementDepth++; 591 592 // We use the Xalan specific startElement and starPrefixMapping calls 593 // (and addAttribute and namespaceAfterStartElement) as opposed to 594 // SAX specific, for performance reasons as they reduce the overhead 595 // of creating an AttList object upfront. 596 597 // well-formed=true 598 if ((fFeatures & WELLFORMED) != 0) { 599 isElementWellFormed(node); 600 } 601 602 // REVISIT: We apply the LSSerializer filter for elements before 603 // namesapce fixup 604 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 605 return; 606 } 607 608 // namespaces=true, record and fixup namspaced element 609 if ((fFeatures & NAMESPACES) != 0) { 610 fNSBinder.pushContext(); 611 fLocalNSBinder.reset(); 612 613 recordLocalNSDecl(node); 614 fixupElementNS(node); 615 } 616 617 // Namespace normalization 618 fSerializer.startElement( 619 node.getNamespaceURI(), 620 node.getLocalName(), 621 node.getNodeName()); 622 623 serializeAttList(node); 624 625 } else { 626 fElementDepth--; 627 628 // apply the LSSerializer filter 629 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 630 return; 631 } 632 633 this.fSerializer.endElement( 634 node.getNamespaceURI(), 635 node.getLocalName(), 636 node.getNodeName()); 637 // since endPrefixMapping was not used by SerializationHandler it was removed 638 // for performance reasons. 639 640 if ((fFeatures & NAMESPACES) != 0 ) { 641 fNSBinder.popContext(); 642 } 643 644 } 645 } 646 647 /** 648 * Serializes the Attr Nodes of an Element. 649 * 650 * @param node The OwnerElement whose Attr Nodes are to be serialized. 651 */ 652 protected void serializeAttList(Element node) throws SAXException { 653 NamedNodeMap atts = node.getAttributes(); 654 int nAttrs = atts.getLength(); 655 656 for (int i = 0; i < nAttrs; i++) { 657 Node attr = atts.item(i); 658 659 String localName = attr.getLocalName(); 660 String attrName = attr.getNodeName(); 661 String attrPrefix = attr.getPrefix() == null ? "" : attr.getPrefix(); 662 String attrValue = attr.getNodeValue(); 663 664 // Determine the Attr's type. 665 String type = null; 666 if (fIsLevel3DOM) { 667 type = ((Attr) attr).getSchemaTypeInfo().getTypeName(); 668 } 669 type = type == null ? "CDATA" : type; 670 671 String attrNS = attr.getNamespaceURI(); 672 if (attrNS !=null && attrNS.length() == 0) { 673 attrNS=null; 674 // we must remove prefix for this attribute 675 attrName=attr.getLocalName(); 676 } 677 678 boolean isSpecified = ((Attr) attr).getSpecified(); 679 boolean addAttr = true; 680 boolean applyFilter = false; 681 boolean xmlnsAttr = 682 attrName.equals("xmlns") || attrName.startsWith("xmlns:"); 683 684 // well-formed=true 685 if ((fFeatures & WELLFORMED) != 0) { 686 isAttributeWellFormed(attr); 687 } 688 689 //----------------------------------------------------------------- 690 // start Attribute namespace fixup 691 //----------------------------------------------------------------- 692 // namespaces=true, normalize all non-namespace attributes 693 // Step 3. Attribute 694 if ((fFeatures & NAMESPACES) != 0 && !xmlnsAttr) { 695 696 // If the Attr has a namespace URI 697 if (attrNS != null) { 698 attrPrefix = attrPrefix == null ? "" : attrPrefix; 699 700 String declAttrPrefix = fNSBinder.getPrefix(attrNS); 701 String declAttrNS = fNSBinder.getURI(attrPrefix); 702 703 // attribute has no prefix (default namespace decl does not apply to 704 // attributes) 705 // OR 706 // attribute prefix is not declared 707 // OR 708 // conflict: attribute has a prefix that conflicts with a binding 709 if ("".equals(attrPrefix) || "".equals(declAttrPrefix) 710 || !attrPrefix.equals(declAttrPrefix)) { 711 712 // namespaceURI matches an in scope declaration of one or 713 // more prefixes 714 if (declAttrPrefix != null && !"".equals(declAttrPrefix)) { 715 // pick the prefix that was found and change attribute's 716 // prefix and nodeName. 717 attrPrefix = declAttrPrefix; 718 719 if (declAttrPrefix.length() > 0 ) { 720 attrName = declAttrPrefix + ":" + localName; 721 } else { 722 attrName = localName; 723 } 724 } else { 725 // The current prefix is not null and it has no in scope 726 // declaration 727 if (attrPrefix != null && !"".equals(attrPrefix) 728 && declAttrNS == null) { 729 // declare this prefix 730 if ((fFeatures & NAMESPACEDECLS) != 0) { 731 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 732 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 733 attrNS); 734 fNSBinder.declarePrefix(attrPrefix, attrNS); 735 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 736 } 737 } else { 738 // find a prefix following the pattern "NS" +index 739 // (starting at 1) 740 // make sure this prefix is not declared in the current 741 // scope. 742 int counter = 1; 743 attrPrefix = "NS" + counter++; 744 745 while (fLocalNSBinder.getURI(attrPrefix) != null) { 746 attrPrefix = "NS" + counter++; 747 } 748 // change attribute's prefix and Name 749 attrName = attrPrefix + ":" + localName; 750 751 // create a local namespace declaration attribute 752 // Add the xmlns declaration attribute 753 if ((fFeatures & NAMESPACEDECLS) != 0) { 754 755 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 756 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 757 attrNS); 758 fNSBinder.declarePrefix(attrPrefix, attrNS); 759 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 760 } 761 } 762 } 763 } 764 765 } else { // if the Attr has no namespace URI 766 // Attr has no localName 767 if (localName == null) { 768 // DOM Level 1 node! 769 String msg = Utils.messages.createMessage( 770 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 771 new Object[] { attrName }); 772 773 if (fErrorHandler != null) { 774 fErrorHandler 775 .handleError(new DOMErrorImpl( 776 DOMError.SEVERITY_ERROR, msg, 777 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, null, 778 null, null)); 779 } 780 781 } else { // uri=null and no colon 782 // attr has no namespace URI and no prefix 783 // no action is required, since attrs don't use default 784 } 785 } 786 787 } 788 789 790 // discard-default-content=true 791 // Default attr's are not passed to the filter and this contraint 792 // is applied only when discard-default-content=true 793 // What about default xmlns attributes???? check for xmlnsAttr 794 if ((((fFeatures & DISCARDDEFAULT) != 0) && isSpecified) 795 || ((fFeatures & DISCARDDEFAULT) == 0)) { 796 applyFilter = true; 797 } else { 798 addAttr = false; 799 } 800 801 if (applyFilter) { 802 // apply the filter for Attributes that are not default attributes 803 // or namespace decl attributes 804 if (fFilter != null 805 && (fFilter.getWhatToShow() & NodeFilter.SHOW_ATTRIBUTE) 806 != 0) { 807 808 if (!xmlnsAttr) { 809 short code = fFilter.acceptNode(attr); 810 switch (code) { 811 case NodeFilter.FILTER_REJECT : 812 case NodeFilter.FILTER_SKIP : 813 addAttr = false; 814 break; 815 default : //fall through.. 816 } 817 } 818 } 819 } 820 821 // if the node is a namespace node 822 if (addAttr && xmlnsAttr) { 823 // If namespace-declarations=true, add the node , else don't add it 824 if ((fFeatures & NAMESPACEDECLS) != 0) { 825 // The namespace may have been fixed up, in that case don't add it. 826 if (localName != null && !"".equals(localName)) { 827 fSerializer.addAttribute(attrNS, localName, attrName, type, attrValue); 828 } 829 } 830 } else if ( 831 addAttr && !xmlnsAttr) { // if the node is not a namespace node 832 // If namespace-declarations=true, add the node with the Attr nodes namespaceURI 833 // else add the node setting it's namespace to null or else the serializer will later 834 // attempt to add a xmlns attr for the prefixed attribute 835 if (((fFeatures & NAMESPACEDECLS) != 0) && (attrNS != null)) { 836 fSerializer.addAttribute( 837 attrNS, 838 localName, 839 attrName, 840 type, 841 attrValue); 842 } else { 843 fSerializer.addAttribute( 844 "", 845 localName, 846 attrName, 847 type, 848 attrValue); 849 } 850 } 851 852 // 853 if (xmlnsAttr && ((fFeatures & NAMESPACEDECLS) != 0)) { 854 int index; 855 // Use "" instead of null, as Xerces likes "" for the 856 // name of the default namespace. Fix attributed 857 // to "Steven Murray" <smurray@ebt.com>. 858 String prefix = 859 (index = attrName.indexOf(":")) < 0 860 ? "" 861 : attrName.substring(index + 1); 862 863 if (!"".equals(prefix)) { 864 fSerializer.namespaceAfterStartElement(prefix, attrValue); 865 } 866 } 867 } 868 869 } 870 871 /** 872 * Serializes an ProcessingInstruction Node. 873 * 874 * @param node The ProcessingInstruction Node to serialize 875 */ 876 protected void serializePI(ProcessingInstruction node) 877 throws SAXException { 878 ProcessingInstruction pi = node; 879 String name = pi.getNodeName(); 880 881 // well-formed=true 882 if ((fFeatures & WELLFORMED) != 0) { 883 isPIWellFormed(node); 884 } 885 886 // apply the LSSerializer filter 887 if (!applyFilter(node, NodeFilter.SHOW_PROCESSING_INSTRUCTION)) { 888 return; 889 } 890 891 // String data = pi.getData(); 892 if (name.equals("xslt-next-is-raw")) { 893 fNextIsRaw = true; 894 } else { 895 this.fSerializer.processingInstruction(name, pi.getData()); 896 } 897 } 898 899 /** 900 * Serializes an CDATASection Node. 901 * 902 * @param node The CDATASection Node to serialize 903 */ 904 protected void serializeCDATASection(CDATASection node) 905 throws SAXException { 906 // well-formed=true 907 if ((fFeatures & WELLFORMED) != 0) { 908 isCDATASectionWellFormed(node); 909 } 910 911 // cdata-sections = true 912 if ((fFeatures & CDATA) != 0) { 913 914 // split-cdata-sections = true 915 // Assumption: This parameter has an effect only when 916 // cdata-sections=true 917 // ToStream, by default splits cdata-sections. Hence the check 918 // below. 919 String nodeValue = node.getNodeValue(); 920 int endIndex = nodeValue.indexOf("]]>"); 921 if ((fFeatures & SPLITCDATA) != 0) { 922 if (endIndex >= 0) { 923 // The first node split will contain the ]] markers 924 String relatedData = nodeValue.substring(0, endIndex + 2); 925 926 String msg = 927 Utils.messages.createMessage( 928 MsgKey.ER_CDATA_SECTIONS_SPLIT, 929 null); 930 931 if (fErrorHandler != null) { 932 fErrorHandler.handleError( 933 new DOMErrorImpl( 934 DOMError.SEVERITY_WARNING, 935 msg, 936 MsgKey.ER_CDATA_SECTIONS_SPLIT, 937 null, 938 relatedData, 939 null)); 940 } 941 } 942 } else { 943 if (endIndex >= 0) { 944 // The first node split will contain the ]] markers 945 String relatedData = nodeValue.substring(0, endIndex + 2); 946 947 String msg = 948 Utils.messages.createMessage( 949 MsgKey.ER_CDATA_SECTIONS_SPLIT, 950 null); 951 952 if (fErrorHandler != null) { 953 fErrorHandler.handleError( 954 new DOMErrorImpl( 955 DOMError.SEVERITY_ERROR, 956 msg, 957 MsgKey.ER_CDATA_SECTIONS_SPLIT)); 958 } 959 // Report an error and return. What error??? 960 return; 961 } 962 } 963 964 // apply the LSSerializer filter 965 if (!applyFilter(node, NodeFilter.SHOW_CDATA_SECTION)) { 966 return; 967 } 968 969 // splits the cdata-section 970 if (fLexicalHandler != null) { 971 fLexicalHandler.startCDATA(); 972 } 973 dispatachChars(node); 974 if (fLexicalHandler != null) { 975 fLexicalHandler.endCDATA(); 976 } 977 } else { 978 dispatachChars(node); 979 } 980 } 981 982 /** 983 * Serializes an Text Node. 984 * 985 * @param node The Text Node to serialize 986 */ 987 protected void serializeText(Text node) throws SAXException { 988 if (fNextIsRaw) { 989 fNextIsRaw = false; 990 fSerializer.processingInstruction( 991 javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, 992 ""); 993 dispatachChars(node); 994 fSerializer.processingInstruction( 995 javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, 996 ""); 997 } else { 998 // keep track of dispatch or not to avoid duplicaiton of filter code 999 boolean bDispatch = false; 1000 1001 // well-formed=true 1002 if ((fFeatures & WELLFORMED) != 0) { 1003 isTextWellFormed(node); 1004 } 1005 1006 // if the node is whitespace 1007 // Determine the Attr's type. 1008 boolean isElementContentWhitespace = false; 1009 if (fIsLevel3DOM) { 1010 isElementContentWhitespace = 1011 node.isElementContentWhitespace(); 1012 } 1013 1014 if (isElementContentWhitespace) { 1015 // element-content-whitespace=true 1016 if ((fFeatures & ELEM_CONTENT_WHITESPACE) != 0) { 1017 bDispatch = true; 1018 } 1019 } else { 1020 bDispatch = true; 1021 } 1022 1023 // apply the LSSerializer filter 1024 if (!applyFilter(node, NodeFilter.SHOW_TEXT)) { 1025 return; 1026 } 1027 1028 if (bDispatch 1029 && (!fSerializer.getIndent() || !node.getData().replace('\n', ' ').trim().isEmpty())) { 1030 dispatachChars(node); 1031 } 1032 } 1033 } 1034 1035 /** 1036 * Serializes an EntityReference Node. 1037 * 1038 * @param node The EntityReference Node to serialize 1039 * @param bStart Inicates if called from start or endNode 1040 */ 1041 protected void serializeEntityReference( 1042 EntityReference node, 1043 boolean bStart) 1044 throws SAXException { 1045 if (bStart) { 1046 EntityReference eref = node; 1047 // entities=true 1048 if ((fFeatures & ENTITIES) != 0) { 1049 1050 // perform well-formedness and other checking only if 1051 // entities = true 1052 1053 // well-formed=true 1054 if ((fFeatures & WELLFORMED) != 0) { 1055 isEntityReferneceWellFormed(node); 1056 } 1057 1058 // check "unbound-prefix-in-entity-reference" [fatal] 1059 // Raised if the configuration parameter "namespaces" is set to true 1060 if ((fFeatures & NAMESPACES) != 0) { 1061 checkUnboundPrefixInEntRef(node); 1062 } 1063 1064 // The filter should not apply in this case, since the 1065 // EntityReference is not being expanded. 1066 // should we pass entity reference nodes to the filter??? 1067 } 1068 1069 // if "entities" is true, or EntityReference node has no children, 1070 // it will be serialized as the form "&entityName;" in the output. 1071 if (fLexicalHandler != null && ((fFeatures & ENTITIES) != 0 || !node.hasChildNodes())) { 1072 1073 // startEntity outputs only Text but not Element, Attr, Comment 1074 // and PI child nodes. It does so by setting the m_inEntityRef 1075 // in ToStream and using this to decide if a node is to be 1076 // serialized or not. 1077 fLexicalHandler.startEntity(eref.getNodeName()); 1078 } 1079 1080 } else { 1081 EntityReference eref = node; 1082 // entities=true or false, 1083 if (fLexicalHandler != null) { 1084 fLexicalHandler.endEntity(eref.getNodeName()); 1085 } 1086 } 1087 } 1088 1089 1090 // *********************************************************************** 1091 // Methods to check well-formedness 1092 // *********************************************************************** 1093 /** 1094 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1095 * 1096 * Check the string against XML's definition of acceptable names for 1097 * elements and attributes and so on using the XMLCharacterProperties 1098 * utility class 1099 */ 1100 protected boolean isXMLName(String s, boolean xml11Version) { 1101 1102 if (s == null) { 1103 return false; 1104 } 1105 if (!xml11Version) 1106 return XMLChar.isValidName(s); 1107 else 1108 return XML11Char.isXML11ValidName(s); 1109 } 1110 1111 /** 1112 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1113 * 1114 * Checks if the given qualified name is legal with respect 1115 * to the version of XML to which this document must conform. 1116 * 1117 * @param prefix prefix of qualified name 1118 * @param local local part of qualified name 1119 */ 1120 protected boolean isValidQName( 1121 String prefix, 1122 String local, 1123 boolean xml11Version) { 1124 1125 // check that both prefix and local part match NCName 1126 if (local == null) 1127 return false; 1128 boolean validNCName = false; 1129 1130 if (!xml11Version) { 1131 validNCName = 1132 (prefix == null || XMLChar.isValidNCName(prefix)) 1133 && XMLChar.isValidNCName(local); 1134 } else { 1135 validNCName = 1136 (prefix == null || XML11Char.isXML11ValidNCName(prefix)) 1137 && XML11Char.isXML11ValidNCName(local); 1138 } 1139 1140 return validNCName; 1141 } 1142 1143 /** 1144 * Checks if a XML character is well-formed 1145 * 1146 * @param characters A String of characters to be checked for Well-Formedness 1147 * @param refInvalidChar A reference to the character to be returned that was determined invalid. 1148 */ 1149 protected boolean isWFXMLChar(String chardata, Character refInvalidChar) { 1150 if (chardata == null || (chardata.length() == 0)) { 1151 return true; 1152 } 1153 1154 char[] dataarray = chardata.toCharArray(); 1155 int datalength = dataarray.length; 1156 1157 // version of the document is XML 1.1 1158 if (fIsXMLVersion11) { 1159 //we need to check all characters as per production rules of XML11 1160 int i = 0; 1161 while (i < datalength) { 1162 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1163 // check if this is a supplemental character 1164 char ch = dataarray[i - 1]; 1165 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1166 char ch2 = dataarray[i++]; 1167 if (XMLChar.isLowSurrogate(ch2) 1168 && XMLChar.isSupplemental( 1169 XMLChar.supplemental(ch, ch2))) { 1170 continue; 1171 } 1172 } 1173 // Reference to invalid character which is returned 1174 refInvalidChar = ch; 1175 return false; 1176 } 1177 } 1178 } // version of the document is XML 1.0 1179 else { 1180 // we need to check all characters as per production rules of XML 1.0 1181 int i = 0; 1182 while (i < datalength) { 1183 if (XMLChar.isInvalid(dataarray[i++])) { 1184 // check if this is a supplemental character 1185 char ch = dataarray[i - 1]; 1186 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1187 char ch2 = dataarray[i++]; 1188 if (XMLChar.isLowSurrogate(ch2) 1189 && XMLChar.isSupplemental( 1190 XMLChar.supplemental(ch, ch2))) { 1191 continue; 1192 } 1193 } 1194 // Reference to invalid character which is returned 1195 refInvalidChar = ch; 1196 return false; 1197 } 1198 } 1199 } // end-else fDocument.isXMLVersion() 1200 1201 return true; 1202 } // isXMLCharWF 1203 1204 /** 1205 * Checks if a XML character is well-formed. If there is a problem with 1206 * the character a non-null Character is returned else null is returned. 1207 * 1208 * @param characters A String of characters to be checked for Well-Formedness 1209 * @return Character A reference to the character to be returned that was determined invalid. 1210 */ 1211 protected Character isWFXMLChar(String chardata) { 1212 Character refInvalidChar; 1213 if (chardata == null || (chardata.length() == 0)) { 1214 return null; 1215 } 1216 1217 char[] dataarray = chardata.toCharArray(); 1218 int datalength = dataarray.length; 1219 1220 // version of the document is XML 1.1 1221 if (fIsXMLVersion11) { 1222 //we need to check all characters as per production rules of XML11 1223 int i = 0; 1224 while (i < datalength) { 1225 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1226 // check if this is a supplemental character 1227 char ch = dataarray[i - 1]; 1228 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1229 char ch2 = dataarray[i++]; 1230 if (XMLChar.isLowSurrogate(ch2) 1231 && XMLChar.isSupplemental( 1232 XMLChar.supplemental(ch, ch2))) { 1233 continue; 1234 } 1235 } 1236 // Reference to invalid character which is returned 1237 refInvalidChar = ch; 1238 return refInvalidChar; 1239 } 1240 } 1241 } // version of the document is XML 1.0 1242 else { 1243 // we need to check all characters as per production rules of XML 1.0 1244 int i = 0; 1245 while (i < datalength) { 1246 if (XMLChar.isInvalid(dataarray[i++])) { 1247 // check if this is a supplemental character 1248 char ch = dataarray[i - 1]; 1249 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1250 char ch2 = dataarray[i++]; 1251 if (XMLChar.isLowSurrogate(ch2) 1252 && XMLChar.isSupplemental( 1253 XMLChar.supplemental(ch, ch2))) { 1254 continue; 1255 } 1256 } 1257 // Reference to invalid character which is returned 1258 refInvalidChar = ch; 1259 return refInvalidChar; 1260 } 1261 } 1262 } // end-else fDocument.isXMLVersion() 1263 1264 return null; 1265 } // isXMLCharWF 1266 1267 /** 1268 * Checks if a comment node is well-formed 1269 * 1270 * @param data The contents of the comment node 1271 * @return a boolean indiacating if the comment is well-formed or not. 1272 */ 1273 protected void isCommentWellFormed(String data) { 1274 if (data == null || (data.length() == 0)) { 1275 return; 1276 } 1277 1278 char[] dataarray = data.toCharArray(); 1279 int datalength = dataarray.length; 1280 1281 // version of the document is XML 1.1 1282 if (fIsXMLVersion11) { 1283 // we need to check all chracters as per production rules of XML11 1284 int i = 0; 1285 while (i < datalength) { 1286 char c = dataarray[i++]; 1287 if (XML11Char.isXML11Invalid(c)) { 1288 // check if this is a supplemental character 1289 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1290 char c2 = dataarray[i++]; 1291 if (XMLChar.isLowSurrogate(c2) 1292 && XMLChar.isSupplemental( 1293 XMLChar.supplemental(c, c2))) { 1294 continue; 1295 } 1296 } 1297 String msg = 1298 Utils.messages.createMessage( 1299 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1300 new Object[] { c}); 1301 1302 if (fErrorHandler != null) { 1303 fErrorHandler.handleError( 1304 new DOMErrorImpl( 1305 DOMError.SEVERITY_FATAL_ERROR, 1306 msg, 1307 MsgKey.ER_WF_INVALID_CHARACTER, 1308 null, 1309 null, 1310 null)); 1311 } 1312 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1313 String msg = 1314 Utils.messages.createMessage( 1315 MsgKey.ER_WF_DASH_IN_COMMENT, 1316 null); 1317 1318 if (fErrorHandler != null) { 1319 fErrorHandler.handleError( 1320 new DOMErrorImpl( 1321 DOMError.SEVERITY_FATAL_ERROR, 1322 msg, 1323 MsgKey.ER_WF_INVALID_CHARACTER, 1324 null, 1325 null, 1326 null)); 1327 } 1328 } 1329 } 1330 } // version of the document is XML 1.0 1331 else { 1332 // we need to check all chracters as per production rules of XML 1.0 1333 int i = 0; 1334 while (i < datalength) { 1335 char c = dataarray[i++]; 1336 if (XMLChar.isInvalid(c)) { 1337 // check if this is a supplemental character 1338 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1339 char c2 = dataarray[i++]; 1340 if (XMLChar.isLowSurrogate(c2) 1341 && XMLChar.isSupplemental( 1342 XMLChar.supplemental(c, c2))) { 1343 continue; 1344 } 1345 } 1346 String msg = 1347 Utils.messages.createMessage( 1348 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1349 new Object[] { c}); 1350 1351 if (fErrorHandler != null) { 1352 fErrorHandler.handleError( 1353 new DOMErrorImpl( 1354 DOMError.SEVERITY_FATAL_ERROR, 1355 msg, 1356 MsgKey.ER_WF_INVALID_CHARACTER, 1357 null, 1358 null, 1359 null)); 1360 } 1361 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1362 String msg = 1363 Utils.messages.createMessage( 1364 MsgKey.ER_WF_DASH_IN_COMMENT, 1365 null); 1366 1367 if (fErrorHandler != null) { 1368 fErrorHandler.handleError( 1369 new DOMErrorImpl( 1370 DOMError.SEVERITY_FATAL_ERROR, 1371 msg, 1372 MsgKey.ER_WF_INVALID_CHARACTER, 1373 null, 1374 null, 1375 null)); 1376 } 1377 } 1378 } 1379 } 1380 return; 1381 } 1382 1383 /** 1384 * Checks if an element node is well-formed, by checking its Name for well-formedness. 1385 * 1386 * @param data The contents of the comment node 1387 * @return a boolean indiacating if the comment is well-formed or not. 1388 */ 1389 protected void isElementWellFormed(Node node) { 1390 boolean isNameWF = false; 1391 if ((fFeatures & NAMESPACES) != 0) { 1392 isNameWF = 1393 isValidQName( 1394 node.getPrefix(), 1395 node.getLocalName(), 1396 fIsXMLVersion11); 1397 } else { 1398 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1399 } 1400 1401 if (!isNameWF) { 1402 String msg = 1403 Utils.messages.createMessage( 1404 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1405 new Object[] { "Element", node.getNodeName()}); 1406 1407 if (fErrorHandler != null) { 1408 fErrorHandler.handleError( 1409 new DOMErrorImpl( 1410 DOMError.SEVERITY_FATAL_ERROR, 1411 msg, 1412 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1413 null, 1414 null, 1415 null)); 1416 } 1417 } 1418 } 1419 1420 /** 1421 * Checks if an attr node is well-formed, by checking it's Name and value 1422 * for well-formedness. 1423 * 1424 * @param data The contents of the comment node 1425 * @return a boolean indiacating if the comment is well-formed or not. 1426 */ 1427 protected void isAttributeWellFormed(Node node) { 1428 boolean isNameWF = false; 1429 if ((fFeatures & NAMESPACES) != 0) { 1430 isNameWF = 1431 isValidQName( 1432 node.getPrefix(), 1433 node.getLocalName(), 1434 fIsXMLVersion11); 1435 } else { 1436 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1437 } 1438 1439 if (!isNameWF) { 1440 String msg = 1441 Utils.messages.createMessage( 1442 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1443 new Object[] { "Attr", node.getNodeName()}); 1444 1445 if (fErrorHandler != null) { 1446 fErrorHandler.handleError( 1447 new DOMErrorImpl( 1448 DOMError.SEVERITY_FATAL_ERROR, 1449 msg, 1450 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1451 null, 1452 null, 1453 null)); 1454 } 1455 } 1456 1457 // Check the Attr's node value 1458 // WFC: No < in Attribute Values 1459 String value = node.getNodeValue(); 1460 if (value.indexOf('<') >= 0) { 1461 String msg = 1462 Utils.messages.createMessage( 1463 MsgKey.ER_WF_LT_IN_ATTVAL, 1464 new Object[] { 1465 ((Attr) node).getOwnerElement().getNodeName(), 1466 node.getNodeName()}); 1467 1468 if (fErrorHandler != null) { 1469 fErrorHandler.handleError( 1470 new DOMErrorImpl( 1471 DOMError.SEVERITY_FATAL_ERROR, 1472 msg, 1473 MsgKey.ER_WF_LT_IN_ATTVAL, 1474 null, 1475 null, 1476 null)); 1477 } 1478 } 1479 1480 // we need to loop through the children of attr nodes and check their values for 1481 // well-formedness 1482 NodeList children = node.getChildNodes(); 1483 for (int i = 0; i < children.getLength(); i++) { 1484 Node child = children.item(i); 1485 // An attribute node with no text or entity ref child for example 1486 // doc.createAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:ns"); 1487 // followes by 1488 // element.setAttributeNodeNS(attribute); 1489 // can potentially lead to this situation. If the attribute 1490 // was a prefix Namespace attribute declaration then then DOM Core 1491 // should have some exception defined for this. 1492 if (child == null) { 1493 // we should probably report an error 1494 continue; 1495 } 1496 switch (child.getNodeType()) { 1497 case Node.TEXT_NODE : 1498 isTextWellFormed((Text) child); 1499 break; 1500 case Node.ENTITY_REFERENCE_NODE : 1501 isEntityReferneceWellFormed((EntityReference) child); 1502 break; 1503 default : 1504 } 1505 } 1506 1507 // TODO: 1508 // WFC: Check if the attribute prefix is bound to 1509 // http://www.w3.org/2000/xmlns/ 1510 1511 // WFC: Unique Att Spec 1512 // Perhaps pass a seen boolean value to this method. serializeAttList will determine 1513 // if the attr was seen before. 1514 } 1515 1516 /** 1517 * Checks if a PI node is well-formed, by checking it's Name and data 1518 * for well-formedness. 1519 * 1520 * @param data The contents of the comment node 1521 */ 1522 protected void isPIWellFormed(ProcessingInstruction node) { 1523 // Is the PI Target a valid XML name 1524 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1525 String msg = 1526 Utils.messages.createMessage( 1527 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1528 new Object[] { "ProcessingInstruction", node.getTarget()}); 1529 1530 if (fErrorHandler != null) { 1531 fErrorHandler.handleError( 1532 new DOMErrorImpl( 1533 DOMError.SEVERITY_FATAL_ERROR, 1534 msg, 1535 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1536 null, 1537 null, 1538 null)); 1539 } 1540 } 1541 1542 // Does the PI Data carry valid XML characters 1543 1544 // REVISIT: Should we check if the PI DATA contains a ?> ??? 1545 Character invalidChar = isWFXMLChar(node.getData()); 1546 if (invalidChar != null) { 1547 String msg = 1548 Utils.messages.createMessage( 1549 MsgKey.ER_WF_INVALID_CHARACTER_IN_PI, 1550 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1551 1552 if (fErrorHandler != null) { 1553 fErrorHandler.handleError( 1554 new DOMErrorImpl( 1555 DOMError.SEVERITY_FATAL_ERROR, 1556 msg, 1557 MsgKey.ER_WF_INVALID_CHARACTER, 1558 null, 1559 null, 1560 null)); 1561 } 1562 } 1563 } 1564 1565 /** 1566 * Checks if an CDATASection node is well-formed, by checking it's data 1567 * for well-formedness. Note that the presence of a CDATA termination mark 1568 * in the contents of a CDATASection is handled by the parameter 1569 * spli-cdata-sections 1570 * 1571 * @param data The contents of the comment node 1572 */ 1573 protected void isCDATASectionWellFormed(CDATASection node) { 1574 // Does the data valid XML character data 1575 Character invalidChar = isWFXMLChar(node.getData()); 1576 //if (!isWFXMLChar(node.getData(), invalidChar)) { 1577 if (invalidChar != null) { 1578 String msg = 1579 Utils.messages.createMessage( 1580 MsgKey.ER_WF_INVALID_CHARACTER_IN_CDATA, 1581 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1582 1583 if (fErrorHandler != null) { 1584 fErrorHandler.handleError( 1585 new DOMErrorImpl( 1586 DOMError.SEVERITY_FATAL_ERROR, 1587 msg, 1588 MsgKey.ER_WF_INVALID_CHARACTER, 1589 null, 1590 null, 1591 null)); 1592 } 1593 } 1594 } 1595 1596 /** 1597 * Checks if an Text node is well-formed, by checking if it contains invalid 1598 * XML characters. 1599 * 1600 * @param data The contents of the comment node 1601 */ 1602 protected void isTextWellFormed(Text node) { 1603 // Does the data valid XML character data 1604 Character invalidChar = isWFXMLChar(node.getData()); 1605 if (invalidChar != null) { 1606 String msg = 1607 Utils.messages.createMessage( 1608 MsgKey.ER_WF_INVALID_CHARACTER_IN_TEXT, 1609 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1610 1611 if (fErrorHandler != null) { 1612 fErrorHandler.handleError( 1613 new DOMErrorImpl( 1614 DOMError.SEVERITY_FATAL_ERROR, 1615 msg, 1616 MsgKey.ER_WF_INVALID_CHARACTER, 1617 null, 1618 null, 1619 null)); 1620 } 1621 } 1622 } 1623 1624 /** 1625 * Checks if an EntityRefernece node is well-formed, by checking it's node name. Then depending 1626 * on whether it is referenced in Element content or in an Attr Node, checks if the EntityReference 1627 * references an unparsed entity or a external entity and if so throws raises the 1628 * appropriate well-formedness error. 1629 * 1630 * @param data The contents of the comment node 1631 * @parent The parent of the EntityReference Node 1632 */ 1633 protected void isEntityReferneceWellFormed(EntityReference node) { 1634 // Is the EntityReference name a valid XML name 1635 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1636 String msg = 1637 Utils.messages.createMessage( 1638 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1639 new Object[] { "EntityReference", node.getNodeName()}); 1640 1641 if (fErrorHandler != null) { 1642 fErrorHandler.handleError( 1643 new DOMErrorImpl( 1644 DOMError.SEVERITY_FATAL_ERROR, 1645 msg, 1646 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1647 null, 1648 null, 1649 null)); 1650 } 1651 } 1652 1653 // determine the parent node 1654 Node parent = node.getParentNode(); 1655 1656 // Traverse the declared entities and check if the nodeName and namespaceURI 1657 // of the EntityReference matches an Entity. If so, check the if the notationName 1658 // is not null, if so, report an error. 1659 DocumentType docType = node.getOwnerDocument().getDoctype(); 1660 if (docType != null) { 1661 NamedNodeMap entities = docType.getEntities(); 1662 for (int i = 0; i < entities.getLength(); i++) { 1663 Entity ent = (Entity) entities.item(i); 1664 1665 String nodeName = 1666 node.getNodeName() == null ? "" : node.getNodeName(); 1667 String nodeNamespaceURI = 1668 node.getNamespaceURI() == null 1669 ? "" 1670 : node.getNamespaceURI(); 1671 String entName = 1672 ent.getNodeName() == null ? "" : ent.getNodeName(); 1673 String entNamespaceURI = 1674 ent.getNamespaceURI() == null ? "" : ent.getNamespaceURI(); 1675 // If referenced in Element content 1676 // WFC: Parsed Entity 1677 if (parent.getNodeType() == Node.ELEMENT_NODE) { 1678 if (entNamespaceURI.equals(nodeNamespaceURI) 1679 && entName.equals(nodeName)) { 1680 1681 if (ent.getNotationName() != null) { 1682 String msg = 1683 Utils.messages.createMessage( 1684 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1685 new Object[] { node.getNodeName()}); 1686 1687 if (fErrorHandler != null) { 1688 fErrorHandler.handleError( 1689 new DOMErrorImpl( 1690 DOMError.SEVERITY_FATAL_ERROR, 1691 msg, 1692 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1693 null, 1694 null, 1695 null)); 1696 } 1697 } 1698 } 1699 } // end if WFC: Parsed Entity 1700 1701 // If referenced in an Attr value 1702 // WFC: No External Entity References 1703 if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { 1704 if (entNamespaceURI.equals(nodeNamespaceURI) 1705 && entName.equals(nodeName)) { 1706 1707 if (ent.getPublicId() != null 1708 || ent.getSystemId() != null 1709 || ent.getNotationName() != null) { 1710 String msg = 1711 Utils.messages.createMessage( 1712 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1713 new Object[] { node.getNodeName()}); 1714 1715 if (fErrorHandler != null) { 1716 fErrorHandler.handleError( 1717 new DOMErrorImpl( 1718 DOMError.SEVERITY_FATAL_ERROR, 1719 msg, 1720 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1721 null, 1722 null, 1723 null)); 1724 } 1725 } 1726 } 1727 } //end if WFC: No External Entity References 1728 } 1729 } 1730 } // isEntityReferneceWellFormed 1731 1732 /** 1733 * If the configuration parameter "namespaces" is set to true, this methods 1734 * checks if an entity whose replacement text contains unbound namespace 1735 * prefixes is referenced in a location where there are no bindings for 1736 * the namespace prefixes and if so raises a LSException with the error-type 1737 * "unbound-prefix-in-entity-reference" 1738 * 1739 * @param Node, The EntityReference nodes whose children are to be checked 1740 */ 1741 protected void checkUnboundPrefixInEntRef(Node node) { 1742 Node child, next; 1743 for (child = node.getFirstChild(); child != null; child = next) { 1744 next = child.getNextSibling(); 1745 1746 if (child.getNodeType() == Node.ELEMENT_NODE) { 1747 1748 //If a NamespaceURI is not declared for the current 1749 //node's prefix, raise a fatal error. 1750 String prefix = child.getPrefix(); 1751 if (prefix != null 1752 && fNSBinder.getURI(prefix) == null) { 1753 String msg = 1754 Utils.messages.createMessage( 1755 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1756 new Object[] { 1757 node.getNodeName(), 1758 child.getNodeName(), 1759 prefix }); 1760 1761 if (fErrorHandler != null) { 1762 fErrorHandler.handleError( 1763 new DOMErrorImpl( 1764 DOMError.SEVERITY_FATAL_ERROR, 1765 msg, 1766 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1767 null, 1768 null, 1769 null)); 1770 } 1771 } 1772 1773 NamedNodeMap attrs = child.getAttributes(); 1774 1775 for (int i = 0; i < attrs.getLength(); i++) { 1776 String attrPrefix = attrs.item(i).getPrefix(); 1777 if (attrPrefix != null 1778 && fNSBinder.getURI(attrPrefix) == null) { 1779 String msg = 1780 Utils.messages.createMessage( 1781 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1782 new Object[] { 1783 node.getNodeName(), 1784 child.getNodeName(), 1785 attrs.item(i)}); 1786 1787 if (fErrorHandler != null) { 1788 fErrorHandler.handleError( 1789 new DOMErrorImpl( 1790 DOMError.SEVERITY_FATAL_ERROR, 1791 msg, 1792 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1793 null, 1794 null, 1795 null)); 1796 } 1797 } 1798 } 1799 } 1800 1801 if (child.hasChildNodes()) { 1802 checkUnboundPrefixInEntRef(child); 1803 } 1804 } 1805 } 1806 1807 // *********************************************************************** 1808 // Namespace normalization 1809 // *********************************************************************** 1810 /** 1811 * Records local namespace declarations, to be used for normalization later 1812 * 1813 * @param Node, The element node, whose namespace declarations are to be recorded 1814 */ 1815 protected void recordLocalNSDecl(Node node) { 1816 NamedNodeMap atts = ((Element) node).getAttributes(); 1817 int length = atts.getLength(); 1818 1819 for (int i = 0; i < length; i++) { 1820 Node attr = atts.item(i); 1821 1822 String localName = attr.getLocalName(); 1823 String attrPrefix = attr.getPrefix(); 1824 String attrValue = attr.getNodeValue(); 1825 String attrNS = attr.getNamespaceURI(); 1826 1827 localName = 1828 localName == null 1829 || XMLNS_PREFIX.equals(localName) ? "" : localName; 1830 attrPrefix = attrPrefix == null ? "" : attrPrefix; 1831 attrValue = attrValue == null ? "" : attrValue; 1832 attrNS = attrNS == null ? "" : attrNS; 1833 1834 // check if attribute is a namespace decl 1835 if (XMLNS_URI.equals(attrNS)) { 1836 1837 // No prefix may be bound to http://www.w3.org/2000/xmlns/. 1838 if (XMLNS_URI.equals(attrValue)) { 1839 String msg = 1840 Utils.messages.createMessage( 1841 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1842 new Object[] { attrPrefix, XMLNS_URI }); 1843 1844 if (fErrorHandler != null) { 1845 fErrorHandler.handleError( 1846 new DOMErrorImpl( 1847 DOMError.SEVERITY_ERROR, 1848 msg, 1849 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1850 null, 1851 null, 1852 null)); 1853 } 1854 } else { 1855 // store the namespace-declaration 1856 if (XMLNS_PREFIX.equals(attrPrefix) ) { 1857 // record valid decl 1858 if (attrValue.length() != 0) { 1859 fNSBinder.declarePrefix(localName, attrValue); 1860 } else { 1861 // Error; xmlns:prefix="" 1862 } 1863 } else { // xmlns 1864 // empty prefix is always bound ("" or some string) 1865 fNSBinder.declarePrefix("", attrValue); 1866 } 1867 } 1868 1869 } 1870 } 1871 } 1872 1873 /** 1874 * Fixes an element's namespace 1875 * 1876 * @param Node, The element node, whose namespace is to be fixed 1877 */ 1878 protected void fixupElementNS(Node node) throws SAXException { 1879 String namespaceURI = ((Element) node).getNamespaceURI(); 1880 String prefix = ((Element) node).getPrefix(); 1881 String localName = ((Element) node).getLocalName(); 1882 1883 if (namespaceURI != null) { 1884 //if ( Element's prefix/namespace pair (or default namespace, 1885 // if no prefix) are within the scope of a binding ) 1886 prefix = prefix == null ? "" : prefix; 1887 String inScopeNamespaceURI = fNSBinder.getURI(prefix); 1888 1889 if ((inScopeNamespaceURI != null 1890 && inScopeNamespaceURI.equals(namespaceURI))) { 1891 // do nothing, declaration in scope is inherited 1892 1893 } else { 1894 // Create a local namespace declaration attr for this namespace, 1895 // with Element's current prefix (or a default namespace, if 1896 // no prefix). If there's a conflicting local declaration 1897 // already present, change its value to use this namespace. 1898 1899 // Add the xmlns declaration attribute 1900 //fNSBinder.pushNamespace(prefix, namespaceURI, fElementDepth); 1901 if ((fFeatures & NAMESPACEDECLS) != 0) { 1902 if ("".equals(prefix) || "".equals(namespaceURI)) { 1903 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, namespaceURI); 1904 } else { 1905 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX + ":" + prefix, namespaceURI); 1906 } 1907 } 1908 fLocalNSBinder.declarePrefix(prefix, namespaceURI); 1909 fNSBinder.declarePrefix(prefix, namespaceURI); 1910 1911 } 1912 } else { 1913 // Element has no namespace 1914 // DOM Level 1 1915 if (localName == null || "".equals(localName)) { 1916 // DOM Level 1 node! 1917 String msg = 1918 Utils.messages.createMessage( 1919 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1920 new Object[] { node.getNodeName()}); 1921 1922 if (fErrorHandler != null) { 1923 fErrorHandler.handleError( 1924 new DOMErrorImpl( 1925 DOMError.SEVERITY_ERROR, 1926 msg, 1927 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1928 null, 1929 null, 1930 null)); 1931 } 1932 } else { 1933 namespaceURI = fNSBinder.getURI(""); 1934 if (namespaceURI !=null && namespaceURI.length() > 0) { 1935 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, ""); 1936 fLocalNSBinder.declarePrefix("", ""); 1937 fNSBinder.declarePrefix("", ""); 1938 } 1939 } 1940 } 1941 } 1942 /** 1943 * This table is a quick lookup of a property key (String) to the integer that 1944 * is the bit to flip in the fFeatures field, so the integers should have 1945 * values 1,2,4,8,16... 1946 * 1947 */ 1948 private static final Map<String, Integer> fFeatureMap; 1949 static { 1950 1951 // Initialize the mappings of property keys to bit values (Integer objects) 1952 // or mappings to a String object "", which indicates we are interested 1953 // in the property, but it does not have a simple bit value to flip 1954 1955 Map<String, Integer> featureMap = new HashMap<>(); 1956 // cdata-sections 1957 featureMap.put( 1958 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_CDATA_SECTIONS, 1959 CDATA); 1960 1961 // comments 1962 featureMap.put( 1963 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_COMMENTS, 1964 COMMENTS); 1965 1966 // element-content-whitespace 1967 featureMap.put( 1968 DOMConstants.S_DOM3_PROPERTIES_NS 1969 + DOMConstants.DOM_ELEMENT_CONTENT_WHITESPACE, 1970 ELEM_CONTENT_WHITESPACE); 1971 1972 // entities 1973 featureMap.put( 1974 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_ENTITIES, 1975 ENTITIES); 1976 1977 // namespaces 1978 featureMap.put( 1979 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_NAMESPACES, 1980 NAMESPACES); 1981 1982 // namespace-declarations 1983 featureMap.put( 1984 DOMConstants.S_DOM3_PROPERTIES_NS 1985 + DOMConstants.DOM_NAMESPACE_DECLARATIONS, 1986 NAMESPACEDECLS); 1987 1988 // split-cdata-sections 1989 featureMap.put( 1990 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_SPLIT_CDATA, 1991 SPLITCDATA); 1992 1993 // discard-default-content 1994 featureMap.put( 1995 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_WELLFORMED, 1996 WELLFORMED); 1997 1998 // discard-default-content 1999 featureMap.put( 2000 DOMConstants.S_DOM3_PROPERTIES_NS 2001 + DOMConstants.DOM_DISCARD_DEFAULT_CONTENT, 2002 DISCARDDEFAULT); 2003 2004 fFeatureMap = Collections.unmodifiableMap(featureMap); 2005 } 2006 2007 /** 2008 * Initializes fFeatures based on the DOMConfiguration Parameters set. 2009 * 2010 * @param properties DOMConfiguraiton properties that were set and which are 2011 * to be used while serializing the DOM. 2012 */ 2013 protected void initProperties(Properties properties) { 2014 for(String key : properties.stringPropertyNames()) { 2015 2016 // caonical-form 2017 // Other features will be enabled or disabled when this is set to true or false. 2018 2019 // error-handler; set via the constructor 2020 2021 // infoset 2022 // Other features will be enabled or disabled when this is set to true 2023 2024 // A quick lookup for the given set of properties (cdata-sections ...) 2025 final Integer bitFlag = fFeatureMap.get(key); 2026 if (bitFlag != null) { 2027 // Dealing with a property that has a simple bit value that 2028 // we need to set 2029 2030 // cdata-sections 2031 // comments 2032 // element-content-whitespace 2033 // entities 2034 // namespaces 2035 // namespace-declarations 2036 // split-cdata-sections 2037 // well-formed 2038 // discard-default-content 2039 if ((properties.getProperty(key).endsWith("yes"))) { 2040 fFeatures = fFeatures | bitFlag; 2041 } else { 2042 fFeatures = fFeatures & ~bitFlag; 2043 } 2044 } else { 2045 /** 2046 * Other properties that have a bit more complex value 2047 * than the features in the above map. 2048 */ 2049 if ((DOMConstants.S_DOM3_PROPERTIES_NS 2050 + DOMConstants.DOM_FORMAT_PRETTY_PRINT) 2051 .equals(key)) { 2052 // format-pretty-print; set internally on the serializers via xsl:output properties in LSSerializer 2053 if ((properties.getProperty(key).endsWith("yes"))) { 2054 fSerializer.setIndent(true); 2055 fSerializer.setIndentAmount(4); 2056 } else { 2057 fSerializer.setIndent(false); 2058 } 2059 } else if ((DOMConstants.S_XSL_OUTPUT_OMIT_XML_DECL).equals(key)) { 2060 // omit-xml-declaration; set internally on the serializers via xsl:output properties in LSSerializer 2061 if ((properties.getProperty(key).endsWith("yes"))) { 2062 fSerializer.setOmitXMLDeclaration(true); 2063 } else { 2064 fSerializer.setOmitXMLDeclaration(false); 2065 } 2066 } else if ((DOMConstants.S_XERCES_PROPERTIES_NS 2067 + DOMConstants.S_XML_VERSION).equals(key)) { 2068 // Retreive the value of the XML Version attribute via the xml-version 2069 String version = properties.getProperty(key); 2070 if ("1.1".equals(version)) { 2071 fIsXMLVersion11 = true; 2072 fSerializer.setVersion(version); 2073 } else { 2074 fSerializer.setVersion("1.0"); 2075 } 2076 } else if ((DOMConstants.S_XSL_OUTPUT_ENCODING).equals(key)) { 2077 // Retreive the value of the XML Encoding attribute 2078 String encoding = properties.getProperty(key); 2079 if (encoding != null) { 2080 fSerializer.setEncoding(encoding); 2081 } 2082 } else if ((OutputPropertiesFactory.S_KEY_ENTITIES).equals(key)) { 2083 // Retreive the value of the XML Encoding attribute 2084 String entities = properties.getProperty(key); 2085 if (DOMConstants.S_XSL_VALUE_ENTITIES.equals(entities)) { 2086 fSerializer.setDTDEntityExpansion(false); 2087 } 2088 } 2089 } 2090 } 2091 // Set the newLine character to use 2092 if (fNewLine != null) { 2093 fSerializer.setOutputProperty(OutputPropertiesFactory.S_KEY_LINE_SEPARATOR, fNewLine); 2094 } 2095 } 2096 2097 } //TreeWalker