1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.serializer.dom3; 22 23 import com.sun.org.apache.xerces.internal.util.XML11Char; 24 import com.sun.org.apache.xerces.internal.util.XMLChar; 25 import com.sun.org.apache.xml.internal.serializer.OutputPropertiesFactory; 26 import com.sun.org.apache.xml.internal.serializer.SerializationHandler; 27 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 28 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 29 import java.io.IOException; 30 import java.io.Writer; 31 import java.util.Collections; 32 import java.util.Enumeration; 33 import java.util.HashMap; 34 import java.util.Map; 35 import java.util.Properties; 36 import org.w3c.dom.Attr; 37 import org.w3c.dom.CDATASection; 38 import org.w3c.dom.Comment; 39 import org.w3c.dom.DOMError; 40 import org.w3c.dom.DOMErrorHandler; 41 import org.w3c.dom.Document; 42 import org.w3c.dom.DocumentType; 43 import org.w3c.dom.Element; 44 import org.w3c.dom.Entity; 45 import org.w3c.dom.EntityReference; 46 import org.w3c.dom.NamedNodeMap; 47 import org.w3c.dom.Node; 48 import org.w3c.dom.NodeList; 49 import org.w3c.dom.ProcessingInstruction; 50 import org.w3c.dom.Text; 51 import org.w3c.dom.ls.LSSerializerFilter; 52 import org.w3c.dom.traversal.NodeFilter; 53 import org.xml.sax.Locator; 54 import org.xml.sax.SAXException; 55 import org.xml.sax.ext.LexicalHandler; 56 import org.xml.sax.helpers.LocatorImpl; 57 58 /** 59 * Built on org.apache.xml.serializer.TreeWalker and adds functionality to 60 * traverse and serialize a DOM Node (Level 2 or Level 3) as specified in 61 * the DOM Level 3 LS Recommedation by evaluating and applying DOMConfiguration 62 * parameters and filters if any during serialization. 63 * 64 * @xsl.usage internal 65 */ 66 final class DOM3TreeWalker { 67 68 /** 69 * The SerializationHandler, it extends ContentHandler and when 70 * this class is instantiated via the constructor provided, a 71 * SerializationHandler object is passed to it. 72 */ 73 private SerializationHandler fSerializer = null; 74 75 /** We do not need DOM2Helper since DOM Level 3 LS applies to DOM Level 2 or newer */ 76 77 /** Locator object for this TreeWalker */ 78 private LocatorImpl fLocator = new LocatorImpl(); 79 80 /** ErrorHandler */ 81 private DOMErrorHandler fErrorHandler = null; 82 83 /** LSSerializerFilter */ 84 private LSSerializerFilter fFilter = null; 85 86 /** If the serializer is an instance of a LexicalHandler */ 87 private LexicalHandler fLexicalHandler = null; 88 89 private int fWhatToShowFilter; 90 91 /** New Line character to use in serialization */ 92 private String fNewLine = null; 93 94 /** DOMConfiguration Properties */ 95 private Properties fDOMConfigProperties = null; 96 97 /** Keeps track if we are in an entity reference when entities=true */ 98 private boolean fInEntityRef = false; 99 100 /** Stores the version of the XML document to be serialize */ 101 private String fXMLVersion = null; 102 103 /** XML Version, default 1.0 */ 104 private boolean fIsXMLVersion11 = false; 105 106 /** Is the Node a Level 3 DOM node */ 107 private boolean fIsLevel3DOM = false; 108 109 /** DOM Configuration Parameters */ 110 private int fFeatures = 0; 111 112 /** Flag indicating whether following text to be processed is raw text */ 113 boolean fNextIsRaw = false; 114 115 // 116 private static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/"; 117 118 // 119 private static final String XMLNS_PREFIX = "xmlns"; 120 121 // 122 private static final String XML_URI = "http://www.w3.org/XML/1998/namespace"; 123 124 // 125 private static final String XML_PREFIX = "xml"; 126 127 /** stores namespaces in scope */ 128 protected NamespaceSupport fNSBinder; 129 130 /** stores all namespace bindings on the current element */ 131 protected NamespaceSupport fLocalNSBinder; 132 133 /** stores the current element depth */ 134 private int fElementDepth = 0; 135 136 // *********************************************************************** 137 // DOMConfiguration paramter settings 138 // *********************************************************************** 139 // Parameter canonical-form, true [optional] - NOT SUPPORTED 140 private final static int CANONICAL = 0x1 << 0; 141 142 // Parameter cdata-sections, true [required] (default) 143 private final static int CDATA = 0x1 << 1; 144 145 // Parameter check-character-normalization, true [optional] - NOT SUPPORTED 146 private final static int CHARNORMALIZE = 0x1 << 2; 147 148 // Parameter comments, true [required] (default) 149 private final static int COMMENTS = 0x1 << 3; 150 151 // Parameter datatype-normalization, true [optional] - NOT SUPPORTED 152 private final static int DTNORMALIZE = 0x1 << 4; 153 154 // Parameter element-content-whitespace, true [required] (default) - value - false [optional] NOT SUPPORTED 155 private final static int ELEM_CONTENT_WHITESPACE = 0x1 << 5; 156 157 // Parameter entities, true [required] (default) 158 private final static int ENTITIES = 0x1 << 6; 159 160 // Parameter infoset, true [required] (default), false has no effect --> True has no effect for the serializer 161 private final static int INFOSET = 0x1 << 7; 162 163 // Parameter namespaces, true [required] (default) 164 private final static int NAMESPACES = 0x1 << 8; 165 166 // Parameter namespace-declarations, true [required] (default) 167 private final static int NAMESPACEDECLS = 0x1 << 9; 168 169 // Parameter normalize-characters, true [optional] - NOT SUPPORTED 170 private final static int NORMALIZECHARS = 0x1 << 10; 171 172 // Parameter split-cdata-sections, true [required] (default) 173 private final static int SPLITCDATA = 0x1 << 11; 174 175 // Parameter validate, true [optional] - NOT SUPPORTED 176 private final static int VALIDATE = 0x1 << 12; 177 178 // Parameter validate-if-schema, true [optional] - NOT SUPPORTED 179 private final static int SCHEMAVALIDATE = 0x1 << 13; 180 181 // Parameter split-cdata-sections, true [required] (default) 182 private final static int WELLFORMED = 0x1 << 14; 183 184 // Parameter discard-default-content, true [required] (default) 185 // Not sure how this will be used in level 2 Documents 186 private final static int DISCARDDEFAULT = 0x1 << 15; 187 188 // Parameter format-pretty-print, true [optional] 189 private final static int PRETTY_PRINT = 0x1 << 16; 190 191 // Parameter ignore-unknown-character-denormalizations, true [required] (default) 192 // We currently do not support XML 1.1 character normalization 193 private final static int IGNORE_CHAR_DENORMALIZE = 0x1 << 17; 194 195 // Parameter discard-default-content, true [required] (default) 196 private final static int XMLDECL = 0x1 << 18; 197 198 /** 199 * Constructor. 200 * @param contentHandler serialHandler The implemention of the SerializationHandler interface 201 */ 202 DOM3TreeWalker( 203 SerializationHandler serialHandler, 204 DOMErrorHandler errHandler, 205 LSSerializerFilter filter, 206 String newLine) { 207 fSerializer = serialHandler; 208 //fErrorHandler = errHandler == null ? new DOMErrorHandlerImpl() : errHandler; // Should we be using the default? 209 fErrorHandler = errHandler; 210 fFilter = filter; 211 fLexicalHandler = null; 212 fNewLine = newLine; 213 214 fNSBinder = new NamespaceSupport(); 215 fLocalNSBinder = new NamespaceSupport(); 216 217 fDOMConfigProperties = fSerializer.getOutputFormat(); 218 fSerializer.setDocumentLocator(fLocator); 219 initProperties(fDOMConfigProperties); 220 } 221 222 /** 223 * Perform a pre-order traversal non-recursive style. 224 * 225 * Note that TreeWalker assumes that the subtree is intended to represent 226 * a complete (though not necessarily well-formed) document and, during a 227 * traversal, startDocument and endDocument will always be issued to the 228 * SAX listener. 229 * 230 * @param pos Node in the tree where to start traversal 231 * 232 * @throws TransformerException 233 */ 234 public void traverse(Node pos) throws org.xml.sax.SAXException { 235 this.fSerializer.startDocument(); 236 237 // Determine if the Node is a DOM Level 3 Core Node. 238 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 239 Document ownerDoc = pos.getOwnerDocument(); 240 if (ownerDoc != null 241 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 242 fIsLevel3DOM = true; 243 } 244 } else { 245 if (((Document) pos) 246 .getImplementation() 247 .hasFeature("Core", "3.0")) { 248 fIsLevel3DOM = true; 249 } 250 } 251 252 if (fSerializer instanceof LexicalHandler) { 253 fLexicalHandler = ((LexicalHandler) this.fSerializer); 254 } 255 256 if (fFilter != null) 257 fWhatToShowFilter = fFilter.getWhatToShow(); 258 259 Node top = pos; 260 261 while (null != pos) { 262 startNode(pos); 263 264 Node nextNode = null; 265 266 nextNode = pos.getFirstChild(); 267 268 while (null == nextNode) { 269 endNode(pos); 270 271 if (top.equals(pos)) 272 break; 273 274 nextNode = pos.getNextSibling(); 275 276 if (null == nextNode) { 277 pos = pos.getParentNode(); 278 279 if ((null == pos) || (top.equals(pos))) { 280 if (null != pos) 281 endNode(pos); 282 283 nextNode = null; 284 285 break; 286 } 287 } 288 } 289 290 pos = nextNode; 291 } 292 this.fSerializer.endDocument(); 293 } 294 295 /** 296 * Perform a pre-order traversal non-recursive style. 297 298 * Note that TreeWalker assumes that the subtree is intended to represent 299 * a complete (though not necessarily well-formed) document and, during a 300 * traversal, startDocument and endDocument will always be issued to the 301 * SAX listener. 302 * 303 * @param pos Node in the tree where to start traversal 304 * @param top Node in the tree where to end traversal 305 * 306 * @throws TransformerException 307 */ 308 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException { 309 310 this.fSerializer.startDocument(); 311 312 // Determine if the Node is a DOM Level 3 Core Node. 313 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 314 Document ownerDoc = pos.getOwnerDocument(); 315 if (ownerDoc != null 316 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 317 fIsLevel3DOM = true; 318 } 319 } else { 320 if (((Document) pos) 321 .getImplementation() 322 .hasFeature("Core", "3.0")) { 323 fIsLevel3DOM = true; 324 } 325 } 326 327 if (fSerializer instanceof LexicalHandler) { 328 fLexicalHandler = ((LexicalHandler) this.fSerializer); 329 } 330 331 if (fFilter != null) 332 fWhatToShowFilter = fFilter.getWhatToShow(); 333 334 while (null != pos) { 335 startNode(pos); 336 337 Node nextNode = null; 338 339 nextNode = pos.getFirstChild(); 340 341 while (null == nextNode) { 342 endNode(pos); 343 344 if ((null != top) && top.equals(pos)) 345 break; 346 347 nextNode = pos.getNextSibling(); 348 349 if (null == nextNode) { 350 pos = pos.getParentNode(); 351 352 if ((null == pos) || ((null != top) && top.equals(pos))) { 353 nextNode = null; 354 355 break; 356 } 357 } 358 } 359 360 pos = nextNode; 361 } 362 this.fSerializer.endDocument(); 363 } 364 365 /** 366 * Optimized dispatch of characters. 367 */ 368 private final void dispatachChars(Node node) 369 throws org.xml.sax.SAXException { 370 if (fSerializer != null) { 371 this.fSerializer.characters(node); 372 } else { 373 String data = ((Text) node).getData(); 374 this.fSerializer.characters(data.toCharArray(), 0, data.length()); 375 } 376 } 377 378 /** 379 * Start processing given node 380 * 381 * @param node Node to process 382 * 383 * @throws org.xml.sax.SAXException 384 */ 385 protected void startNode(Node node) throws org.xml.sax.SAXException { 386 if (node instanceof Locator) { 387 Locator loc = (Locator) node; 388 fLocator.setColumnNumber(loc.getColumnNumber()); 389 fLocator.setLineNumber(loc.getLineNumber()); 390 fLocator.setPublicId(loc.getPublicId()); 391 fLocator.setSystemId(loc.getSystemId()); 392 } else { 393 fLocator.setColumnNumber(0); 394 fLocator.setLineNumber(0); 395 } 396 397 switch (node.getNodeType()) { 398 case Node.DOCUMENT_TYPE_NODE : 399 serializeDocType((DocumentType) node, true); 400 break; 401 case Node.COMMENT_NODE : 402 serializeComment((Comment) node); 403 break; 404 case Node.DOCUMENT_FRAGMENT_NODE : 405 // Children are traversed 406 break; 407 case Node.DOCUMENT_NODE : 408 break; 409 case Node.ELEMENT_NODE : 410 serializeElement((Element) node, true); 411 break; 412 case Node.PROCESSING_INSTRUCTION_NODE : 413 serializePI((ProcessingInstruction) node); 414 break; 415 case Node.CDATA_SECTION_NODE : 416 serializeCDATASection((CDATASection) node); 417 break; 418 case Node.TEXT_NODE : 419 serializeText((Text) node); 420 break; 421 case Node.ENTITY_REFERENCE_NODE : 422 serializeEntityReference((EntityReference) node, true); 423 break; 424 default : 425 } 426 } 427 428 /** 429 * End processing of given node 430 * 431 * 432 * @param node Node we just finished processing 433 * 434 * @throws org.xml.sax.SAXException 435 */ 436 protected void endNode(Node node) throws org.xml.sax.SAXException { 437 438 switch (node.getNodeType()) { 439 case Node.DOCUMENT_NODE : 440 break; 441 case Node.DOCUMENT_TYPE_NODE : 442 serializeDocType((DocumentType) node, false); 443 break; 444 case Node.ELEMENT_NODE : 445 serializeElement((Element) node, false); 446 break; 447 case Node.CDATA_SECTION_NODE : 448 break; 449 case Node.ENTITY_REFERENCE_NODE : 450 serializeEntityReference((EntityReference) node, false); 451 break; 452 default : 453 } 454 } 455 456 // *********************************************************************** 457 // Node serialization methods 458 // *********************************************************************** 459 /** 460 * Applies a filter on the node to serialize 461 * 462 * @param node The Node to serialize 463 * @return True if the node is to be serialized else false if the node 464 * is to be rejected or skipped. 465 */ 466 protected boolean applyFilter(Node node, int nodeType) { 467 if (fFilter != null && (fWhatToShowFilter & nodeType) != 0) { 468 469 short code = fFilter.acceptNode(node); 470 switch (code) { 471 case NodeFilter.FILTER_REJECT : 472 case NodeFilter.FILTER_SKIP : 473 return false; // skip the node 474 default : // fall through.. 475 } 476 } 477 return true; 478 } 479 480 /** 481 * Serializes a Document Type Node. 482 * 483 * @param node The Docuemnt Type Node to serialize 484 * @param bStart Invoked at the start or end of node. Default true. 485 */ 486 protected void serializeDocType(DocumentType node, boolean bStart) 487 throws SAXException { 488 // The DocType and internalSubset can not be modified in DOM and is 489 // considered to be well-formed as the outcome of successful parsing. 490 String docTypeName = node.getNodeName(); 491 String publicId = node.getPublicId(); 492 String systemId = node.getSystemId(); 493 String internalSubset = node.getInternalSubset(); 494 495 //DocumentType nodes are never passed to the filter 496 497 if (internalSubset != null && !"".equals(internalSubset)) { 498 499 if (bStart) { 500 try { 501 // The Serializer does not provide a way to write out the 502 // DOCTYPE internal subset via an event call, so we write it 503 // out here. 504 Writer writer = fSerializer.getWriter(); 505 StringBuffer dtd = new StringBuffer(); 506 507 dtd.append("<!DOCTYPE "); 508 dtd.append(docTypeName); 509 if (null != publicId) { 510 dtd.append(" PUBLIC \""); 511 dtd.append(publicId); 512 dtd.append('\"'); 513 } 514 515 if (null != systemId) { 516 if (null == publicId) { 517 dtd.append(" SYSTEM \""); 518 } else { 519 dtd.append(" \""); 520 } 521 dtd.append(systemId); 522 dtd.append('\"'); 523 } 524 525 dtd.append(" [ "); 526 527 dtd.append(fNewLine); 528 dtd.append(internalSubset); 529 dtd.append("]>"); 530 dtd.append(fNewLine); 531 532 writer.write(dtd.toString()); 533 writer.flush(); 534 535 } catch (IOException e) { 536 throw new SAXException(Utils.messages.createMessage( 537 MsgKey.ER_WRITING_INTERNAL_SUBSET, null), e); 538 } 539 } // else if !bStart do nothing 540 541 } else { 542 543 if (bStart) { 544 if (fLexicalHandler != null) { 545 fLexicalHandler.startDTD(docTypeName, publicId, systemId); 546 } 547 } else { 548 if (fLexicalHandler != null) { 549 fLexicalHandler.endDTD(); 550 } 551 } 552 } 553 } 554 555 /** 556 * Serializes a Comment Node. 557 * 558 * @param node The Comment Node to serialize 559 */ 560 protected void serializeComment(Comment node) throws SAXException { 561 // comments=true 562 if ((fFeatures & COMMENTS) != 0) { 563 String data = node.getData(); 564 565 // well-formed=true 566 if ((fFeatures & WELLFORMED) != 0) { 567 isCommentWellFormed(data); 568 } 569 570 if (fLexicalHandler != null) { 571 // apply the LSSerializer filter after the operations requested by the 572 // DOMConfiguration parameters have been applied 573 if (!applyFilter(node, NodeFilter.SHOW_COMMENT)) { 574 return; 575 } 576 577 fLexicalHandler.comment(data.toCharArray(), 0, data.length()); 578 } 579 } 580 } 581 582 /** 583 * Serializes an Element Node. 584 * 585 * @param node The Element Node to serialize 586 * @param bStart Invoked at the start or end of node. 587 */ 588 protected void serializeElement(Element node, boolean bStart) 589 throws SAXException { 590 if (bStart) { 591 fElementDepth++; 592 593 // We use the Xalan specific startElement and starPrefixMapping calls 594 // (and addAttribute and namespaceAfterStartElement) as opposed to 595 // SAX specific, for performance reasons as they reduce the overhead 596 // of creating an AttList object upfront. 597 598 // well-formed=true 599 if ((fFeatures & WELLFORMED) != 0) { 600 isElementWellFormed(node); 601 } 602 603 // REVISIT: We apply the LSSerializer filter for elements before 604 // namesapce fixup 605 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 606 return; 607 } 608 609 // namespaces=true, record and fixup namspaced element 610 if ((fFeatures & NAMESPACES) != 0) { 611 fNSBinder.pushContext(); 612 fLocalNSBinder.reset(); 613 614 recordLocalNSDecl(node); 615 fixupElementNS(node); 616 } 617 618 // Namespace normalization 619 fSerializer.startElement( 620 node.getNamespaceURI(), 621 node.getLocalName(), 622 node.getNodeName()); 623 624 serializeAttList(node); 625 626 } else { 627 fElementDepth--; 628 629 // apply the LSSerializer filter 630 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 631 return; 632 } 633 634 this.fSerializer.endElement( 635 node.getNamespaceURI(), 636 node.getLocalName(), 637 node.getNodeName()); 638 // since endPrefixMapping was not used by SerializationHandler it was removed 639 // for performance reasons. 640 641 if ((fFeatures & NAMESPACES) != 0 ) { 642 fNSBinder.popContext(); 643 } 644 645 } 646 } 647 648 /** 649 * Serializes the Attr Nodes of an Element. 650 * 651 * @param node The OwnerElement whose Attr Nodes are to be serialized. 652 */ 653 protected void serializeAttList(Element node) throws SAXException { 654 NamedNodeMap atts = node.getAttributes(); 655 int nAttrs = atts.getLength(); 656 657 for (int i = 0; i < nAttrs; i++) { 658 Node attr = atts.item(i); 659 660 String localName = attr.getLocalName(); 661 String attrName = attr.getNodeName(); 662 String attrPrefix = attr.getPrefix() == null ? "" : attr.getPrefix(); 663 String attrValue = attr.getNodeValue(); 664 665 // Determine the Attr's type. 666 String type = null; 667 if (fIsLevel3DOM) { 668 type = ((Attr) attr).getSchemaTypeInfo().getTypeName(); 669 } 670 type = type == null ? "CDATA" : type; 671 672 String attrNS = attr.getNamespaceURI(); 673 if (attrNS !=null && attrNS.length() == 0) { 674 attrNS=null; 675 // we must remove prefix for this attribute 676 attrName=attr.getLocalName(); 677 } 678 679 boolean isSpecified = ((Attr) attr).getSpecified(); 680 boolean addAttr = true; 681 boolean applyFilter = false; 682 boolean xmlnsAttr = 683 attrName.equals("xmlns") || attrName.startsWith("xmlns:"); 684 685 // well-formed=true 686 if ((fFeatures & WELLFORMED) != 0) { 687 isAttributeWellFormed(attr); 688 } 689 690 //----------------------------------------------------------------- 691 // start Attribute namespace fixup 692 //----------------------------------------------------------------- 693 // namespaces=true, normalize all non-namespace attributes 694 // Step 3. Attribute 695 if ((fFeatures & NAMESPACES) != 0 && !xmlnsAttr) { 696 697 // If the Attr has a namespace URI 698 if (attrNS != null) { 699 attrPrefix = attrPrefix == null ? "" : attrPrefix; 700 701 String declAttrPrefix = fNSBinder.getPrefix(attrNS); 702 String declAttrNS = fNSBinder.getURI(attrPrefix); 703 704 // attribute has no prefix (default namespace decl does not apply to 705 // attributes) 706 // OR 707 // attribute prefix is not declared 708 // OR 709 // conflict: attribute has a prefix that conflicts with a binding 710 if ("".equals(attrPrefix) || "".equals(declAttrPrefix) 711 || !attrPrefix.equals(declAttrPrefix)) { 712 713 // namespaceURI matches an in scope declaration of one or 714 // more prefixes 715 if (declAttrPrefix != null && !"".equals(declAttrPrefix)) { 716 // pick the prefix that was found and change attribute's 717 // prefix and nodeName. 718 attrPrefix = declAttrPrefix; 719 720 if (declAttrPrefix.length() > 0 ) { 721 attrName = declAttrPrefix + ":" + localName; 722 } else { 723 attrName = localName; 724 } 725 } else { 726 // The current prefix is not null and it has no in scope 727 // declaration 728 if (attrPrefix != null && !"".equals(attrPrefix) 729 && declAttrNS == null) { 730 // declare this prefix 731 if ((fFeatures & NAMESPACEDECLS) != 0) { 732 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 733 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 734 attrNS); 735 fNSBinder.declarePrefix(attrPrefix, attrNS); 736 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 737 } 738 } else { 739 // find a prefix following the pattern "NS" +index 740 // (starting at 1) 741 // make sure this prefix is not declared in the current 742 // scope. 743 int counter = 1; 744 attrPrefix = "NS" + counter++; 745 746 while (fLocalNSBinder.getURI(attrPrefix) != null) { 747 attrPrefix = "NS" + counter++; 748 } 749 // change attribute's prefix and Name 750 attrName = attrPrefix + ":" + localName; 751 752 // create a local namespace declaration attribute 753 // Add the xmlns declaration attribute 754 if ((fFeatures & NAMESPACEDECLS) != 0) { 755 756 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 757 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 758 attrNS); 759 fNSBinder.declarePrefix(attrPrefix, attrNS); 760 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 761 } 762 } 763 } 764 } 765 766 } else { // if the Attr has no namespace URI 767 // Attr has no localName 768 if (localName == null) { 769 // DOM Level 1 node! 770 String msg = Utils.messages.createMessage( 771 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 772 new Object[] { attrName }); 773 774 if (fErrorHandler != null) { 775 fErrorHandler 776 .handleError(new DOMErrorImpl( 777 DOMError.SEVERITY_ERROR, msg, 778 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, null, 779 null, null)); 780 } 781 782 } else { // uri=null and no colon 783 // attr has no namespace URI and no prefix 784 // no action is required, since attrs don't use default 785 } 786 } 787 788 } 789 790 791 // discard-default-content=true 792 // Default attr's are not passed to the filter and this contraint 793 // is applied only when discard-default-content=true 794 // What about default xmlns attributes???? check for xmlnsAttr 795 if ((((fFeatures & DISCARDDEFAULT) != 0) && isSpecified) 796 || ((fFeatures & DISCARDDEFAULT) == 0)) { 797 applyFilter = true; 798 } else { 799 addAttr = false; 800 } 801 802 if (applyFilter) { 803 // apply the filter for Attributes that are not default attributes 804 // or namespace decl attributes 805 if (fFilter != null 806 && (fFilter.getWhatToShow() & NodeFilter.SHOW_ATTRIBUTE) 807 != 0) { 808 809 if (!xmlnsAttr) { 810 short code = fFilter.acceptNode(attr); 811 switch (code) { 812 case NodeFilter.FILTER_REJECT : 813 case NodeFilter.FILTER_SKIP : 814 addAttr = false; 815 break; 816 default : //fall through.. 817 } 818 } 819 } 820 } 821 822 // if the node is a namespace node 823 if (addAttr && xmlnsAttr) { 824 // If namespace-declarations=true, add the node , else don't add it 825 if ((fFeatures & NAMESPACEDECLS) != 0) { 826 // The namespace may have been fixed up, in that case don't add it. 827 if (localName != null && !"".equals(localName)) { 828 fSerializer.addAttribute(attrNS, localName, attrName, type, attrValue); 829 } 830 } 831 } else if ( 832 addAttr && !xmlnsAttr) { // if the node is not a namespace node 833 // If namespace-declarations=true, add the node with the Attr nodes namespaceURI 834 // else add the node setting it's namespace to null or else the serializer will later 835 // attempt to add a xmlns attr for the prefixed attribute 836 if (((fFeatures & NAMESPACEDECLS) != 0) && (attrNS != null)) { 837 fSerializer.addAttribute( 838 attrNS, 839 localName, 840 attrName, 841 type, 842 attrValue); 843 } else { 844 fSerializer.addAttribute( 845 "", 846 localName, 847 attrName, 848 type, 849 attrValue); 850 } 851 } 852 853 // 854 if (xmlnsAttr && ((fFeatures & NAMESPACEDECLS) != 0)) { 855 int index; 856 // Use "" instead of null, as Xerces likes "" for the 857 // name of the default namespace. Fix attributed 858 // to "Steven Murray" <smurray@ebt.com>. 859 String prefix = 860 (index = attrName.indexOf(":")) < 0 861 ? "" 862 : attrName.substring(index + 1); 863 864 if (!"".equals(prefix)) { 865 fSerializer.namespaceAfterStartElement(prefix, attrValue); 866 } 867 } 868 } 869 870 } 871 872 /** 873 * Serializes an ProcessingInstruction Node. 874 * 875 * @param node The ProcessingInstruction Node to serialize 876 */ 877 protected void serializePI(ProcessingInstruction node) 878 throws SAXException { 879 ProcessingInstruction pi = node; 880 String name = pi.getNodeName(); 881 882 // well-formed=true 883 if ((fFeatures & WELLFORMED) != 0) { 884 isPIWellFormed(node); 885 } 886 887 // apply the LSSerializer filter 888 if (!applyFilter(node, NodeFilter.SHOW_PROCESSING_INSTRUCTION)) { 889 return; 890 } 891 892 // String data = pi.getData(); 893 if (name.equals("xslt-next-is-raw")) { 894 fNextIsRaw = true; 895 } else { 896 this.fSerializer.processingInstruction(name, pi.getData()); 897 } 898 } 899 900 /** 901 * Serializes an CDATASection Node. 902 * 903 * @param node The CDATASection Node to serialize 904 */ 905 protected void serializeCDATASection(CDATASection node) 906 throws SAXException { 907 // well-formed=true 908 if ((fFeatures & WELLFORMED) != 0) { 909 isCDATASectionWellFormed(node); 910 } 911 912 // cdata-sections = true 913 if ((fFeatures & CDATA) != 0) { 914 915 // split-cdata-sections = true 916 // Assumption: This parameter has an effect only when 917 // cdata-sections=true 918 // ToStream, by default splits cdata-sections. Hence the check 919 // below. 920 String nodeValue = node.getNodeValue(); 921 int endIndex = nodeValue.indexOf("]]>"); 922 if ((fFeatures & SPLITCDATA) != 0) { 923 if (endIndex >= 0) { 924 // The first node split will contain the ]] markers 925 String relatedData = nodeValue.substring(0, endIndex + 2); 926 927 String msg = 928 Utils.messages.createMessage( 929 MsgKey.ER_CDATA_SECTIONS_SPLIT, 930 null); 931 932 if (fErrorHandler != null) { 933 fErrorHandler.handleError( 934 new DOMErrorImpl( 935 DOMError.SEVERITY_WARNING, 936 msg, 937 MsgKey.ER_CDATA_SECTIONS_SPLIT, 938 null, 939 relatedData, 940 null)); 941 } 942 } 943 } else { 944 if (endIndex >= 0) { 945 // The first node split will contain the ]] markers 946 String relatedData = nodeValue.substring(0, endIndex + 2); 947 948 String msg = 949 Utils.messages.createMessage( 950 MsgKey.ER_CDATA_SECTIONS_SPLIT, 951 null); 952 953 if (fErrorHandler != null) { 954 fErrorHandler.handleError( 955 new DOMErrorImpl( 956 DOMError.SEVERITY_ERROR, 957 msg, 958 MsgKey.ER_CDATA_SECTIONS_SPLIT)); 959 } 960 // Report an error and return. What error??? 961 return; 962 } 963 } 964 965 // apply the LSSerializer filter 966 if (!applyFilter(node, NodeFilter.SHOW_CDATA_SECTION)) { 967 return; 968 } 969 970 // splits the cdata-section 971 if (fLexicalHandler != null) { 972 fLexicalHandler.startCDATA(); 973 } 974 dispatachChars(node); 975 if (fLexicalHandler != null) { 976 fLexicalHandler.endCDATA(); 977 } 978 } else { 979 dispatachChars(node); 980 } 981 } 982 983 /** 984 * Serializes an Text Node. 985 * 986 * @param node The Text Node to serialize 987 */ 988 protected void serializeText(Text node) throws SAXException { 989 if (fNextIsRaw) { 990 fNextIsRaw = false; 991 fSerializer.processingInstruction( 992 javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, 993 ""); 994 dispatachChars(node); 995 fSerializer.processingInstruction( 996 javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, 997 ""); 998 } else { 999 // keep track of dispatch or not to avoid duplicaiton of filter code 1000 boolean bDispatch = false; 1001 1002 // well-formed=true 1003 if ((fFeatures & WELLFORMED) != 0) { 1004 isTextWellFormed(node); 1005 } 1006 1007 // if the node is whitespace 1008 // Determine the Attr's type. 1009 boolean isElementContentWhitespace = false; 1010 if (fIsLevel3DOM) { 1011 isElementContentWhitespace = 1012 node.isElementContentWhitespace(); 1013 } 1014 1015 if (isElementContentWhitespace) { 1016 // element-content-whitespace=true 1017 if ((fFeatures & ELEM_CONTENT_WHITESPACE) != 0) { 1018 bDispatch = true; 1019 } 1020 } else { 1021 bDispatch = true; 1022 } 1023 1024 // apply the LSSerializer filter 1025 if (!applyFilter(node, NodeFilter.SHOW_TEXT)) { 1026 return; 1027 } 1028 1029 if (bDispatch) { 1030 dispatachChars(node); 1031 } 1032 } 1033 } 1034 1035 /** 1036 * Serializes an EntityReference Node. 1037 * 1038 * @param node The EntityReference Node to serialize 1039 * @param bStart Inicates if called from start or endNode 1040 */ 1041 protected void serializeEntityReference( 1042 EntityReference node, 1043 boolean bStart) 1044 throws SAXException { 1045 if (bStart) { 1046 EntityReference eref = node; 1047 // entities=true 1048 if ((fFeatures & ENTITIES) != 0) { 1049 1050 // perform well-formedness and other checking only if 1051 // entities = true 1052 1053 // well-formed=true 1054 if ((fFeatures & WELLFORMED) != 0) { 1055 isEntityReferneceWellFormed(node); 1056 } 1057 1058 // check "unbound-prefix-in-entity-reference" [fatal] 1059 // Raised if the configuration parameter "namespaces" is set to true 1060 if ((fFeatures & NAMESPACES) != 0) { 1061 checkUnboundPrefixInEntRef(node); 1062 } 1063 1064 // The filter should not apply in this case, since the 1065 // EntityReference is not being expanded. 1066 // should we pass entity reference nodes to the filter??? 1067 } 1068 1069 if (fLexicalHandler != null) { 1070 1071 // startEntity outputs only Text but not Element, Attr, Comment 1072 // and PI child nodes. It does so by setting the m_inEntityRef 1073 // in ToStream and using this to decide if a node is to be 1074 // serialized or not. 1075 fLexicalHandler.startEntity(eref.getNodeName()); 1076 } 1077 1078 } else { 1079 EntityReference eref = node; 1080 // entities=true or false, 1081 if (fLexicalHandler != null) { 1082 fLexicalHandler.endEntity(eref.getNodeName()); 1083 } 1084 } 1085 } 1086 1087 1088 // *********************************************************************** 1089 // Methods to check well-formedness 1090 // *********************************************************************** 1091 /** 1092 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1093 * 1094 * Check the string against XML's definition of acceptable names for 1095 * elements and attributes and so on using the XMLCharacterProperties 1096 * utility class 1097 */ 1098 protected boolean isXMLName(String s, boolean xml11Version) { 1099 1100 if (s == null) { 1101 return false; 1102 } 1103 if (!xml11Version) 1104 return XMLChar.isValidName(s); 1105 else 1106 return XML11Char.isXML11ValidName(s); 1107 } 1108 1109 /** 1110 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1111 * 1112 * Checks if the given qualified name is legal with respect 1113 * to the version of XML to which this document must conform. 1114 * 1115 * @param prefix prefix of qualified name 1116 * @param local local part of qualified name 1117 */ 1118 protected boolean isValidQName( 1119 String prefix, 1120 String local, 1121 boolean xml11Version) { 1122 1123 // check that both prefix and local part match NCName 1124 if (local == null) 1125 return false; 1126 boolean validNCName = false; 1127 1128 if (!xml11Version) { 1129 validNCName = 1130 (prefix == null || XMLChar.isValidNCName(prefix)) 1131 && XMLChar.isValidNCName(local); 1132 } else { 1133 validNCName = 1134 (prefix == null || XML11Char.isXML11ValidNCName(prefix)) 1135 && XML11Char.isXML11ValidNCName(local); 1136 } 1137 1138 return validNCName; 1139 } 1140 1141 /** 1142 * Checks if a XML character is well-formed 1143 * 1144 * @param characters A String of characters to be checked for Well-Formedness 1145 * @param refInvalidChar A reference to the character to be returned that was determined invalid. 1146 */ 1147 protected boolean isWFXMLChar(String chardata, Character refInvalidChar) { 1148 if (chardata == null || (chardata.length() == 0)) { 1149 return true; 1150 } 1151 1152 char[] dataarray = chardata.toCharArray(); 1153 int datalength = dataarray.length; 1154 1155 // version of the document is XML 1.1 1156 if (fIsXMLVersion11) { 1157 //we need to check all characters as per production rules of XML11 1158 int i = 0; 1159 while (i < datalength) { 1160 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1161 // check if this is a supplemental character 1162 char ch = dataarray[i - 1]; 1163 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1164 char ch2 = dataarray[i++]; 1165 if (XMLChar.isLowSurrogate(ch2) 1166 && XMLChar.isSupplemental( 1167 XMLChar.supplemental(ch, ch2))) { 1168 continue; 1169 } 1170 } 1171 // Reference to invalid character which is returned 1172 refInvalidChar = new Character(ch); 1173 return false; 1174 } 1175 } 1176 } // version of the document is XML 1.0 1177 else { 1178 // we need to check all characters as per production rules of XML 1.0 1179 int i = 0; 1180 while (i < datalength) { 1181 if (XMLChar.isInvalid(dataarray[i++])) { 1182 // check if this is a supplemental character 1183 char ch = dataarray[i - 1]; 1184 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1185 char ch2 = dataarray[i++]; 1186 if (XMLChar.isLowSurrogate(ch2) 1187 && XMLChar.isSupplemental( 1188 XMLChar.supplemental(ch, ch2))) { 1189 continue; 1190 } 1191 } 1192 // Reference to invalid character which is returned 1193 refInvalidChar = new Character(ch); 1194 return false; 1195 } 1196 } 1197 } // end-else fDocument.isXMLVersion() 1198 1199 return true; 1200 } // isXMLCharWF 1201 1202 /** 1203 * Checks if a XML character is well-formed. If there is a problem with 1204 * the character a non-null Character is returned else null is returned. 1205 * 1206 * @param characters A String of characters to be checked for Well-Formedness 1207 * @return Character A reference to the character to be returned that was determined invalid. 1208 */ 1209 protected Character isWFXMLChar(String chardata) { 1210 Character refInvalidChar; 1211 if (chardata == null || (chardata.length() == 0)) { 1212 return null; 1213 } 1214 1215 char[] dataarray = chardata.toCharArray(); 1216 int datalength = dataarray.length; 1217 1218 // version of the document is XML 1.1 1219 if (fIsXMLVersion11) { 1220 //we need to check all characters as per production rules of XML11 1221 int i = 0; 1222 while (i < datalength) { 1223 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1224 // check if this is a supplemental character 1225 char ch = dataarray[i - 1]; 1226 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1227 char ch2 = dataarray[i++]; 1228 if (XMLChar.isLowSurrogate(ch2) 1229 && XMLChar.isSupplemental( 1230 XMLChar.supplemental(ch, ch2))) { 1231 continue; 1232 } 1233 } 1234 // Reference to invalid character which is returned 1235 refInvalidChar = new Character(ch); 1236 return refInvalidChar; 1237 } 1238 } 1239 } // version of the document is XML 1.0 1240 else { 1241 // we need to check all characters as per production rules of XML 1.0 1242 int i = 0; 1243 while (i < datalength) { 1244 if (XMLChar.isInvalid(dataarray[i++])) { 1245 // check if this is a supplemental character 1246 char ch = dataarray[i - 1]; 1247 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1248 char ch2 = dataarray[i++]; 1249 if (XMLChar.isLowSurrogate(ch2) 1250 && XMLChar.isSupplemental( 1251 XMLChar.supplemental(ch, ch2))) { 1252 continue; 1253 } 1254 } 1255 // Reference to invalid character which is returned 1256 refInvalidChar = new Character(ch); 1257 return refInvalidChar; 1258 } 1259 } 1260 } // end-else fDocument.isXMLVersion() 1261 1262 return null; 1263 } // isXMLCharWF 1264 1265 /** 1266 * Checks if a comment node is well-formed 1267 * 1268 * @param data The contents of the comment node 1269 * @return a boolean indiacating if the comment is well-formed or not. 1270 */ 1271 protected void isCommentWellFormed(String data) { 1272 if (data == null || (data.length() == 0)) { 1273 return; 1274 } 1275 1276 char[] dataarray = data.toCharArray(); 1277 int datalength = dataarray.length; 1278 1279 // version of the document is XML 1.1 1280 if (fIsXMLVersion11) { 1281 // we need to check all chracters as per production rules of XML11 1282 int i = 0; 1283 while (i < datalength) { 1284 char c = dataarray[i++]; 1285 if (XML11Char.isXML11Invalid(c)) { 1286 // check if this is a supplemental character 1287 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1288 char c2 = dataarray[i++]; 1289 if (XMLChar.isLowSurrogate(c2) 1290 && XMLChar.isSupplemental( 1291 XMLChar.supplemental(c, c2))) { 1292 continue; 1293 } 1294 } 1295 String msg = 1296 Utils.messages.createMessage( 1297 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1298 new Object[] { new Character(c)}); 1299 1300 if (fErrorHandler != null) { 1301 fErrorHandler.handleError( 1302 new DOMErrorImpl( 1303 DOMError.SEVERITY_FATAL_ERROR, 1304 msg, 1305 MsgKey.ER_WF_INVALID_CHARACTER, 1306 null, 1307 null, 1308 null)); 1309 } 1310 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1311 String msg = 1312 Utils.messages.createMessage( 1313 MsgKey.ER_WF_DASH_IN_COMMENT, 1314 null); 1315 1316 if (fErrorHandler != null) { 1317 fErrorHandler.handleError( 1318 new DOMErrorImpl( 1319 DOMError.SEVERITY_FATAL_ERROR, 1320 msg, 1321 MsgKey.ER_WF_INVALID_CHARACTER, 1322 null, 1323 null, 1324 null)); 1325 } 1326 } 1327 } 1328 } // version of the document is XML 1.0 1329 else { 1330 // we need to check all chracters as per production rules of XML 1.0 1331 int i = 0; 1332 while (i < datalength) { 1333 char c = dataarray[i++]; 1334 if (XMLChar.isInvalid(c)) { 1335 // check if this is a supplemental character 1336 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1337 char c2 = dataarray[i++]; 1338 if (XMLChar.isLowSurrogate(c2) 1339 && XMLChar.isSupplemental( 1340 XMLChar.supplemental(c, c2))) { 1341 continue; 1342 } 1343 } 1344 String msg = 1345 Utils.messages.createMessage( 1346 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1347 new Object[] { new Character(c)}); 1348 1349 if (fErrorHandler != null) { 1350 fErrorHandler.handleError( 1351 new DOMErrorImpl( 1352 DOMError.SEVERITY_FATAL_ERROR, 1353 msg, 1354 MsgKey.ER_WF_INVALID_CHARACTER, 1355 null, 1356 null, 1357 null)); 1358 } 1359 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1360 String msg = 1361 Utils.messages.createMessage( 1362 MsgKey.ER_WF_DASH_IN_COMMENT, 1363 null); 1364 1365 if (fErrorHandler != null) { 1366 fErrorHandler.handleError( 1367 new DOMErrorImpl( 1368 DOMError.SEVERITY_FATAL_ERROR, 1369 msg, 1370 MsgKey.ER_WF_INVALID_CHARACTER, 1371 null, 1372 null, 1373 null)); 1374 } 1375 } 1376 } 1377 } 1378 return; 1379 } 1380 1381 /** 1382 * Checks if an element node is well-formed, by checking its Name for well-formedness. 1383 * 1384 * @param data The contents of the comment node 1385 * @return a boolean indiacating if the comment is well-formed or not. 1386 */ 1387 protected void isElementWellFormed(Node node) { 1388 boolean isNameWF = false; 1389 if ((fFeatures & NAMESPACES) != 0) { 1390 isNameWF = 1391 isValidQName( 1392 node.getPrefix(), 1393 node.getLocalName(), 1394 fIsXMLVersion11); 1395 } else { 1396 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1397 } 1398 1399 if (!isNameWF) { 1400 String msg = 1401 Utils.messages.createMessage( 1402 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1403 new Object[] { "Element", node.getNodeName()}); 1404 1405 if (fErrorHandler != null) { 1406 fErrorHandler.handleError( 1407 new DOMErrorImpl( 1408 DOMError.SEVERITY_FATAL_ERROR, 1409 msg, 1410 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1411 null, 1412 null, 1413 null)); 1414 } 1415 } 1416 } 1417 1418 /** 1419 * Checks if an attr node is well-formed, by checking it's Name and value 1420 * for well-formedness. 1421 * 1422 * @param data The contents of the comment node 1423 * @return a boolean indiacating if the comment is well-formed or not. 1424 */ 1425 protected void isAttributeWellFormed(Node node) { 1426 boolean isNameWF = false; 1427 if ((fFeatures & NAMESPACES) != 0) { 1428 isNameWF = 1429 isValidQName( 1430 node.getPrefix(), 1431 node.getLocalName(), 1432 fIsXMLVersion11); 1433 } else { 1434 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1435 } 1436 1437 if (!isNameWF) { 1438 String msg = 1439 Utils.messages.createMessage( 1440 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1441 new Object[] { "Attr", node.getNodeName()}); 1442 1443 if (fErrorHandler != null) { 1444 fErrorHandler.handleError( 1445 new DOMErrorImpl( 1446 DOMError.SEVERITY_FATAL_ERROR, 1447 msg, 1448 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1449 null, 1450 null, 1451 null)); 1452 } 1453 } 1454 1455 // Check the Attr's node value 1456 // WFC: No < in Attribute Values 1457 String value = node.getNodeValue(); 1458 if (value.indexOf('<') >= 0) { 1459 String msg = 1460 Utils.messages.createMessage( 1461 MsgKey.ER_WF_LT_IN_ATTVAL, 1462 new Object[] { 1463 ((Attr) node).getOwnerElement().getNodeName(), 1464 node.getNodeName()}); 1465 1466 if (fErrorHandler != null) { 1467 fErrorHandler.handleError( 1468 new DOMErrorImpl( 1469 DOMError.SEVERITY_FATAL_ERROR, 1470 msg, 1471 MsgKey.ER_WF_LT_IN_ATTVAL, 1472 null, 1473 null, 1474 null)); 1475 } 1476 } 1477 1478 // we need to loop through the children of attr nodes and check their values for 1479 // well-formedness 1480 NodeList children = node.getChildNodes(); 1481 for (int i = 0; i < children.getLength(); i++) { 1482 Node child = children.item(i); 1483 // An attribute node with no text or entity ref child for example 1484 // doc.createAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:ns"); 1485 // followes by 1486 // element.setAttributeNodeNS(attribute); 1487 // can potentially lead to this situation. If the attribute 1488 // was a prefix Namespace attribute declaration then then DOM Core 1489 // should have some exception defined for this. 1490 if (child == null) { 1491 // we should probably report an error 1492 continue; 1493 } 1494 switch (child.getNodeType()) { 1495 case Node.TEXT_NODE : 1496 isTextWellFormed((Text) child); 1497 break; 1498 case Node.ENTITY_REFERENCE_NODE : 1499 isEntityReferneceWellFormed((EntityReference) child); 1500 break; 1501 default : 1502 } 1503 } 1504 1505 // TODO: 1506 // WFC: Check if the attribute prefix is bound to 1507 // http://www.w3.org/2000/xmlns/ 1508 1509 // WFC: Unique Att Spec 1510 // Perhaps pass a seen boolean value to this method. serializeAttList will determine 1511 // if the attr was seen before. 1512 } 1513 1514 /** 1515 * Checks if a PI node is well-formed, by checking it's Name and data 1516 * for well-formedness. 1517 * 1518 * @param data The contents of the comment node 1519 */ 1520 protected void isPIWellFormed(ProcessingInstruction node) { 1521 // Is the PI Target a valid XML name 1522 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1523 String msg = 1524 Utils.messages.createMessage( 1525 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1526 new Object[] { "ProcessingInstruction", node.getTarget()}); 1527 1528 if (fErrorHandler != null) { 1529 fErrorHandler.handleError( 1530 new DOMErrorImpl( 1531 DOMError.SEVERITY_FATAL_ERROR, 1532 msg, 1533 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1534 null, 1535 null, 1536 null)); 1537 } 1538 } 1539 1540 // Does the PI Data carry valid XML characters 1541 1542 // REVISIT: Should we check if the PI DATA contains a ?> ??? 1543 Character invalidChar = isWFXMLChar(node.getData()); 1544 if (invalidChar != null) { 1545 String msg = 1546 Utils.messages.createMessage( 1547 MsgKey.ER_WF_INVALID_CHARACTER_IN_PI, 1548 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1549 1550 if (fErrorHandler != null) { 1551 fErrorHandler.handleError( 1552 new DOMErrorImpl( 1553 DOMError.SEVERITY_FATAL_ERROR, 1554 msg, 1555 MsgKey.ER_WF_INVALID_CHARACTER, 1556 null, 1557 null, 1558 null)); 1559 } 1560 } 1561 } 1562 1563 /** 1564 * Checks if an CDATASection node is well-formed, by checking it's data 1565 * for well-formedness. Note that the presence of a CDATA termination mark 1566 * in the contents of a CDATASection is handled by the parameter 1567 * spli-cdata-sections 1568 * 1569 * @param data The contents of the comment node 1570 */ 1571 protected void isCDATASectionWellFormed(CDATASection node) { 1572 // Does the data valid XML character data 1573 Character invalidChar = isWFXMLChar(node.getData()); 1574 //if (!isWFXMLChar(node.getData(), invalidChar)) { 1575 if (invalidChar != null) { 1576 String msg = 1577 Utils.messages.createMessage( 1578 MsgKey.ER_WF_INVALID_CHARACTER_IN_CDATA, 1579 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1580 1581 if (fErrorHandler != null) { 1582 fErrorHandler.handleError( 1583 new DOMErrorImpl( 1584 DOMError.SEVERITY_FATAL_ERROR, 1585 msg, 1586 MsgKey.ER_WF_INVALID_CHARACTER, 1587 null, 1588 null, 1589 null)); 1590 } 1591 } 1592 } 1593 1594 /** 1595 * Checks if an Text node is well-formed, by checking if it contains invalid 1596 * XML characters. 1597 * 1598 * @param data The contents of the comment node 1599 */ 1600 protected void isTextWellFormed(Text node) { 1601 // Does the data valid XML character data 1602 Character invalidChar = isWFXMLChar(node.getData()); 1603 if (invalidChar != null) { 1604 String msg = 1605 Utils.messages.createMessage( 1606 MsgKey.ER_WF_INVALID_CHARACTER_IN_TEXT, 1607 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1608 1609 if (fErrorHandler != null) { 1610 fErrorHandler.handleError( 1611 new DOMErrorImpl( 1612 DOMError.SEVERITY_FATAL_ERROR, 1613 msg, 1614 MsgKey.ER_WF_INVALID_CHARACTER, 1615 null, 1616 null, 1617 null)); 1618 } 1619 } 1620 } 1621 1622 /** 1623 * Checks if an EntityRefernece node is well-formed, by checking it's node name. Then depending 1624 * on whether it is referenced in Element content or in an Attr Node, checks if the EntityReference 1625 * references an unparsed entity or a external entity and if so throws raises the 1626 * appropriate well-formedness error. 1627 * 1628 * @param data The contents of the comment node 1629 * @parent The parent of the EntityReference Node 1630 */ 1631 protected void isEntityReferneceWellFormed(EntityReference node) { 1632 // Is the EntityReference name a valid XML name 1633 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1634 String msg = 1635 Utils.messages.createMessage( 1636 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1637 new Object[] { "EntityReference", node.getNodeName()}); 1638 1639 if (fErrorHandler != null) { 1640 fErrorHandler.handleError( 1641 new DOMErrorImpl( 1642 DOMError.SEVERITY_FATAL_ERROR, 1643 msg, 1644 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1645 null, 1646 null, 1647 null)); 1648 } 1649 } 1650 1651 // determine the parent node 1652 Node parent = node.getParentNode(); 1653 1654 // Traverse the declared entities and check if the nodeName and namespaceURI 1655 // of the EntityReference matches an Entity. If so, check the if the notationName 1656 // is not null, if so, report an error. 1657 DocumentType docType = node.getOwnerDocument().getDoctype(); 1658 if (docType != null) { 1659 NamedNodeMap entities = docType.getEntities(); 1660 for (int i = 0; i < entities.getLength(); i++) { 1661 Entity ent = (Entity) entities.item(i); 1662 1663 String nodeName = 1664 node.getNodeName() == null ? "" : node.getNodeName(); 1665 String nodeNamespaceURI = 1666 node.getNamespaceURI() == null 1667 ? "" 1668 : node.getNamespaceURI(); 1669 String entName = 1670 ent.getNodeName() == null ? "" : ent.getNodeName(); 1671 String entNamespaceURI = 1672 ent.getNamespaceURI() == null ? "" : ent.getNamespaceURI(); 1673 // If referenced in Element content 1674 // WFC: Parsed Entity 1675 if (parent.getNodeType() == Node.ELEMENT_NODE) { 1676 if (entNamespaceURI.equals(nodeNamespaceURI) 1677 && entName.equals(nodeName)) { 1678 1679 if (ent.getNotationName() != null) { 1680 String msg = 1681 Utils.messages.createMessage( 1682 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1683 new Object[] { node.getNodeName()}); 1684 1685 if (fErrorHandler != null) { 1686 fErrorHandler.handleError( 1687 new DOMErrorImpl( 1688 DOMError.SEVERITY_FATAL_ERROR, 1689 msg, 1690 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1691 null, 1692 null, 1693 null)); 1694 } 1695 } 1696 } 1697 } // end if WFC: Parsed Entity 1698 1699 // If referenced in an Attr value 1700 // WFC: No External Entity References 1701 if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { 1702 if (entNamespaceURI.equals(nodeNamespaceURI) 1703 && entName.equals(nodeName)) { 1704 1705 if (ent.getPublicId() != null 1706 || ent.getSystemId() != null 1707 || ent.getNotationName() != null) { 1708 String msg = 1709 Utils.messages.createMessage( 1710 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1711 new Object[] { node.getNodeName()}); 1712 1713 if (fErrorHandler != null) { 1714 fErrorHandler.handleError( 1715 new DOMErrorImpl( 1716 DOMError.SEVERITY_FATAL_ERROR, 1717 msg, 1718 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1719 null, 1720 null, 1721 null)); 1722 } 1723 } 1724 } 1725 } //end if WFC: No External Entity References 1726 } 1727 } 1728 } // isEntityReferneceWellFormed 1729 1730 /** 1731 * If the configuration parameter "namespaces" is set to true, this methods 1732 * checks if an entity whose replacement text contains unbound namespace 1733 * prefixes is referenced in a location where there are no bindings for 1734 * the namespace prefixes and if so raises a LSException with the error-type 1735 * "unbound-prefix-in-entity-reference" 1736 * 1737 * @param Node, The EntityReference nodes whose children are to be checked 1738 */ 1739 protected void checkUnboundPrefixInEntRef(Node node) { 1740 Node child, next; 1741 for (child = node.getFirstChild(); child != null; child = next) { 1742 next = child.getNextSibling(); 1743 1744 if (child.getNodeType() == Node.ELEMENT_NODE) { 1745 1746 //If a NamespaceURI is not declared for the current 1747 //node's prefix, raise a fatal error. 1748 String prefix = child.getPrefix(); 1749 if (prefix != null 1750 && fNSBinder.getURI(prefix) == null) { 1751 String msg = 1752 Utils.messages.createMessage( 1753 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1754 new Object[] { 1755 node.getNodeName(), 1756 child.getNodeName(), 1757 prefix }); 1758 1759 if (fErrorHandler != null) { 1760 fErrorHandler.handleError( 1761 new DOMErrorImpl( 1762 DOMError.SEVERITY_FATAL_ERROR, 1763 msg, 1764 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1765 null, 1766 null, 1767 null)); 1768 } 1769 } 1770 1771 NamedNodeMap attrs = child.getAttributes(); 1772 1773 for (int i = 0; i < attrs.getLength(); i++) { 1774 String attrPrefix = attrs.item(i).getPrefix(); 1775 if (attrPrefix != null 1776 && fNSBinder.getURI(attrPrefix) == null) { 1777 String msg = 1778 Utils.messages.createMessage( 1779 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1780 new Object[] { 1781 node.getNodeName(), 1782 child.getNodeName(), 1783 attrs.item(i)}); 1784 1785 if (fErrorHandler != null) { 1786 fErrorHandler.handleError( 1787 new DOMErrorImpl( 1788 DOMError.SEVERITY_FATAL_ERROR, 1789 msg, 1790 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1791 null, 1792 null, 1793 null)); 1794 } 1795 } 1796 } 1797 } 1798 1799 if (child.hasChildNodes()) { 1800 checkUnboundPrefixInEntRef(child); 1801 } 1802 } 1803 } 1804 1805 // *********************************************************************** 1806 // Namespace normalization 1807 // *********************************************************************** 1808 /** 1809 * Records local namespace declarations, to be used for normalization later 1810 * 1811 * @param Node, The element node, whose namespace declarations are to be recorded 1812 */ 1813 protected void recordLocalNSDecl(Node node) { 1814 NamedNodeMap atts = ((Element) node).getAttributes(); 1815 int length = atts.getLength(); 1816 1817 for (int i = 0; i < length; i++) { 1818 Node attr = atts.item(i); 1819 1820 String localName = attr.getLocalName(); 1821 String attrPrefix = attr.getPrefix(); 1822 String attrValue = attr.getNodeValue(); 1823 String attrNS = attr.getNamespaceURI(); 1824 1825 localName = 1826 localName == null 1827 || XMLNS_PREFIX.equals(localName) ? "" : localName; 1828 attrPrefix = attrPrefix == null ? "" : attrPrefix; 1829 attrValue = attrValue == null ? "" : attrValue; 1830 attrNS = attrNS == null ? "" : attrNS; 1831 1832 // check if attribute is a namespace decl 1833 if (XMLNS_URI.equals(attrNS)) { 1834 1835 // No prefix may be bound to http://www.w3.org/2000/xmlns/. 1836 if (XMLNS_URI.equals(attrValue)) { 1837 String msg = 1838 Utils.messages.createMessage( 1839 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1840 new Object[] { attrPrefix, XMLNS_URI }); 1841 1842 if (fErrorHandler != null) { 1843 fErrorHandler.handleError( 1844 new DOMErrorImpl( 1845 DOMError.SEVERITY_ERROR, 1846 msg, 1847 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1848 null, 1849 null, 1850 null)); 1851 } 1852 } else { 1853 // store the namespace-declaration 1854 if (XMLNS_PREFIX.equals(attrPrefix) ) { 1855 // record valid decl 1856 if (attrValue.length() != 0) { 1857 fNSBinder.declarePrefix(localName, attrValue); 1858 } else { 1859 // Error; xmlns:prefix="" 1860 } 1861 } else { // xmlns 1862 // empty prefix is always bound ("" or some string) 1863 fNSBinder.declarePrefix("", attrValue); 1864 } 1865 } 1866 1867 } 1868 } 1869 } 1870 1871 /** 1872 * Fixes an element's namespace 1873 * 1874 * @param Node, The element node, whose namespace is to be fixed 1875 */ 1876 protected void fixupElementNS(Node node) throws SAXException { 1877 String namespaceURI = ((Element) node).getNamespaceURI(); 1878 String prefix = ((Element) node).getPrefix(); 1879 String localName = ((Element) node).getLocalName(); 1880 1881 if (namespaceURI != null) { 1882 //if ( Element's prefix/namespace pair (or default namespace, 1883 // if no prefix) are within the scope of a binding ) 1884 prefix = prefix == null ? "" : prefix; 1885 String inScopeNamespaceURI = fNSBinder.getURI(prefix); 1886 1887 if ((inScopeNamespaceURI != null 1888 && inScopeNamespaceURI.equals(namespaceURI))) { 1889 // do nothing, declaration in scope is inherited 1890 1891 } else { 1892 // Create a local namespace declaration attr for this namespace, 1893 // with Element's current prefix (or a default namespace, if 1894 // no prefix). If there's a conflicting local declaration 1895 // already present, change its value to use this namespace. 1896 1897 // Add the xmlns declaration attribute 1898 //fNSBinder.pushNamespace(prefix, namespaceURI, fElementDepth); 1899 if ((fFeatures & NAMESPACEDECLS) != 0) { 1900 if ("".equals(prefix) || "".equals(namespaceURI)) { 1901 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, namespaceURI); 1902 } else { 1903 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX + ":" + prefix, namespaceURI); 1904 } 1905 } 1906 fLocalNSBinder.declarePrefix(prefix, namespaceURI); 1907 fNSBinder.declarePrefix(prefix, namespaceURI); 1908 1909 } 1910 } else { 1911 // Element has no namespace 1912 // DOM Level 1 1913 if (localName == null || "".equals(localName)) { 1914 // DOM Level 1 node! 1915 String msg = 1916 Utils.messages.createMessage( 1917 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1918 new Object[] { node.getNodeName()}); 1919 1920 if (fErrorHandler != null) { 1921 fErrorHandler.handleError( 1922 new DOMErrorImpl( 1923 DOMError.SEVERITY_ERROR, 1924 msg, 1925 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1926 null, 1927 null, 1928 null)); 1929 } 1930 } else { 1931 namespaceURI = fNSBinder.getURI(""); 1932 if (namespaceURI !=null && namespaceURI.length() > 0) { 1933 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, ""); 1934 fLocalNSBinder.declarePrefix("", ""); 1935 fNSBinder.declarePrefix("", ""); 1936 } 1937 } 1938 } 1939 } 1940 /** 1941 * This table is a quick lookup of a property key (String) to the integer that 1942 * is the bit to flip in the fFeatures field, so the integers should have 1943 * values 1,2,4,8,16... 1944 * 1945 */ 1946 private static final Map<String, Integer> fFeatureMap; 1947 static { 1948 1949 // Initialize the mappings of property keys to bit values (Integer objects) 1950 // or mappings to a String object "", which indicates we are interested 1951 // in the property, but it does not have a simple bit value to flip 1952 1953 Map<String, Integer> featureMap = new HashMap<>(); 1954 // cdata-sections 1955 featureMap.put( 1956 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_CDATA_SECTIONS, 1957 CDATA); 1958 1959 // comments 1960 featureMap.put( 1961 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_COMMENTS, 1962 COMMENTS); 1963 1964 // element-content-whitespace 1965 featureMap.put( 1966 DOMConstants.S_DOM3_PROPERTIES_NS 1967 + DOMConstants.DOM_ELEMENT_CONTENT_WHITESPACE, 1968 ELEM_CONTENT_WHITESPACE); 1969 1970 // entities 1971 featureMap.put( 1972 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_ENTITIES, 1973 ENTITIES); 1974 1975 // namespaces 1976 featureMap.put( 1977 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_NAMESPACES, 1978 NAMESPACES); 1979 1980 // namespace-declarations 1981 featureMap.put( 1982 DOMConstants.S_DOM3_PROPERTIES_NS 1983 + DOMConstants.DOM_NAMESPACE_DECLARATIONS, 1984 NAMESPACEDECLS); 1985 1986 // split-cdata-sections 1987 featureMap.put( 1988 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_SPLIT_CDATA, 1989 SPLITCDATA); 1990 1991 // discard-default-content 1992 featureMap.put( 1993 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_WELLFORMED, 1994 WELLFORMED); 1995 1996 // discard-default-content 1997 featureMap.put( 1998 DOMConstants.S_DOM3_PROPERTIES_NS 1999 + DOMConstants.DOM_DISCARD_DEFAULT_CONTENT, 2000 DISCARDDEFAULT); 2001 2002 fFeatureMap = Collections.unmodifiableMap(featureMap); 2003 } 2004 2005 /** 2006 * Initializes fFeatures based on the DOMConfiguration Parameters set. 2007 * 2008 * @param properties DOMConfiguraiton properties that were set and which are 2009 * to be used while serializing the DOM. 2010 */ 2011 protected void initProperties(Properties properties) { 2012 2013 for (Enumeration keys = properties.keys(); keys.hasMoreElements();) { 2014 2015 final String key = (String) keys.nextElement(); 2016 2017 // caonical-form 2018 // Other features will be enabled or disabled when this is set to true or false. 2019 2020 // error-handler; set via the constructor 2021 2022 // infoset 2023 // Other features will be enabled or disabled when this is set to true 2024 2025 // A quick lookup for the given set of properties (cdata-sections ...) 2026 final Integer bitFlag = fFeatureMap.get(key); 2027 if (bitFlag != null) { 2028 // Dealing with a property that has a simple bit value that 2029 // we need to set 2030 2031 // cdata-sections 2032 // comments 2033 // element-content-whitespace 2034 // entities 2035 // namespaces 2036 // namespace-declarations 2037 // split-cdata-sections 2038 // well-formed 2039 // discard-default-content 2040 if ((properties.getProperty(key).endsWith("yes"))) { 2041 fFeatures = fFeatures | bitFlag; 2042 } else { 2043 fFeatures = fFeatures & ~bitFlag; 2044 } 2045 } else { 2046 /** 2047 * Other properties that have a bit more complex value 2048 * than the features in the above map. 2049 */ 2050 if ((DOMConstants.S_DOM3_PROPERTIES_NS 2051 + DOMConstants.DOM_FORMAT_PRETTY_PRINT) 2052 .equals(key)) { 2053 // format-pretty-print; set internally on the serializers via xsl:output properties in LSSerializer 2054 if ((properties.getProperty(key).endsWith("yes"))) { 2055 fSerializer.setIndent(true); 2056 fSerializer.setIndentAmount(4); 2057 } else { 2058 fSerializer.setIndent(false); 2059 } 2060 } else if ((DOMConstants.S_XSL_OUTPUT_OMIT_XML_DECL).equals(key)) { 2061 // omit-xml-declaration; set internally on the serializers via xsl:output properties in LSSerializer 2062 if ((properties.getProperty(key).endsWith("yes"))) { 2063 fSerializer.setOmitXMLDeclaration(true); 2064 } else { 2065 fSerializer.setOmitXMLDeclaration(false); 2066 } 2067 } else if ((DOMConstants.S_XERCES_PROPERTIES_NS 2068 + DOMConstants.S_XML_VERSION).equals(key)) { 2069 // Retreive the value of the XML Version attribute via the xml-version 2070 String version = properties.getProperty(key); 2071 if ("1.1".equals(version)) { 2072 fIsXMLVersion11 = true; 2073 fSerializer.setVersion(version); 2074 } else { 2075 fSerializer.setVersion("1.0"); 2076 } 2077 } else if ((DOMConstants.S_XSL_OUTPUT_ENCODING).equals(key)) { 2078 // Retreive the value of the XML Encoding attribute 2079 String encoding = properties.getProperty(key); 2080 if (encoding != null) { 2081 fSerializer.setEncoding(encoding); 2082 } 2083 } else if ((OutputPropertiesFactory.S_KEY_ENTITIES).equals(key)) { 2084 // Retreive the value of the XML Encoding attribute 2085 String entities = properties.getProperty(key); 2086 if (DOMConstants.S_XSL_VALUE_ENTITIES.equals(entities)) { 2087 fSerializer.setDTDEntityExpansion(false); 2088 } 2089 } 2090 } 2091 } 2092 // Set the newLine character to use 2093 if (fNewLine != null) { 2094 fSerializer.setOutputProperty(OutputPropertiesFactory.S_KEY_LINE_SEPARATOR, fNewLine); 2095 } 2096 } 2097 2098 } //TreeWalker