1 /* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Nov 2017 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 // Sep 14, 2000: 23 // Fixed comments to preserve whitespaces and add a line break 24 // when indenting. Reported by Gervase Markham <gerv@gerv.net> 25 // Sep 14, 2000: 26 // Fixed serializer to report IO exception directly, instead at 27 // the end of document processing. 28 // Reported by Patrick Higgins <phiggins@transzap.com> 29 // Sep 13, 2000: 30 // CR in character data will print as D; 31 // Aug 25, 2000: 32 // Fixed processing instruction printing inside element content 33 // to not escape content. Reported by Mikael Staldal 34 // <d96-mst@d.kth.se> 35 // Aug 25, 2000: 36 // Added ability to omit comments. 37 // Contributed by Anupam Bagchi <abagchi@jtcsv.com> 38 // Aug 26, 2000: 39 // Fixed bug in newline handling when preserving spaces. 40 // Contributed by Mike Dusseault <mdusseault@home.com> 41 // Aug 29, 2000: 42 // Fixed state.unescaped not being set to false when 43 // entering element state. 44 // Reported by Lowell Vaughn <lvaughn@agillion.com> 45 46 47 package com.sun.org.apache.xml.internal.serialize; 48 49 50 import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl; 51 import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl; 52 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 53 import com.sun.org.apache.xerces.internal.util.XMLChar; 54 import java.io.IOException; 55 import java.io.OutputStream; 56 import java.io.Writer; 57 import java.util.ArrayList; 58 import java.util.HashMap; 59 import java.util.List; 60 import java.util.Map; 61 import org.w3c.dom.DOMError; 62 import org.w3c.dom.DOMErrorHandler; 63 import org.w3c.dom.Document; 64 import org.w3c.dom.DocumentFragment; 65 import org.w3c.dom.DocumentType; 66 import org.w3c.dom.Element; 67 import org.w3c.dom.Node; 68 import org.w3c.dom.ls.LSException; 69 import org.w3c.dom.ls.LSSerializerFilter; 70 import org.w3c.dom.traversal.NodeFilter; 71 import org.xml.sax.ContentHandler; 72 import org.xml.sax.DTDHandler; 73 import org.xml.sax.DocumentHandler; 74 import org.xml.sax.Locator; 75 import org.xml.sax.SAXException; 76 import org.xml.sax.ext.DeclHandler; 77 import org.xml.sax.ext.LexicalHandler; 78 79 /** 80 * Base class for a serializer supporting both DOM and SAX pretty 81 * serializing of XML/HTML/XHTML documents. Derives classes perform 82 * the method-specific serializing, this class provides the common 83 * serializing mechanisms. 84 * <p> 85 * The serializer must be initialized with the proper writer and 86 * output format before it can be used by calling {@link #setOutputCharStream} 87 * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} 88 * for the output format. 89 * <p> 90 * The serializer can be reused any number of times, but cannot 91 * be used concurrently by two threads. 92 * <p> 93 * If an output stream is used, the encoding is taken from the 94 * output format (defaults to <tt>UTF-8</tt>). If a writer is 95 * used, make sure the writer uses the same encoding (if applies) 96 * as specified in the output format. 97 * <p> 98 * The serializer supports both DOM and SAX. DOM serializing is done 99 * by calling {@link #serialize(Document)} and SAX serializing is done by firing 100 * SAX events and using the serializer as a document handler. 101 * This also applies to derived class. 102 * <p> 103 * If an I/O exception occurs while serializing, the serializer 104 * will not throw an exception directly, but only throw it 105 * at the end of serializing (either DOM or SAX's {@link 106 * org.xml.sax.DocumentHandler#endDocument}. 107 * <p> 108 * For elements that are not specified as whitespace preserving, 109 * the serializer will potentially break long text lines at space 110 * boundaries, indent lines, and serialize elements on separate 111 * lines. Line terminators will be regarded as spaces, and 112 * spaces at beginning of line will be stripped. 113 * <p> 114 * When indenting, the serializer is capable of detecting seemingly 115 * element content, and serializing these elements indented on separate 116 * lines. An element is serialized indented when it is the first or 117 * last child of an element, or immediate following or preceding 118 * another element. 119 * 120 * 121 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 122 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> 123 * @author Elena Litani, IBM 124 * @author Sunitha Reddy, Sun Microsystems 125 * @see Serializer 126 * @see org.w3c.dom.ls.LSSerializer 127 * 128 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 129 * is replaced by that of Xalan. Main class 130 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 131 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 132 */ 133 @Deprecated 134 public abstract class BaseMarkupSerializer 135 implements ContentHandler, DocumentHandler, LexicalHandler, 136 DTDHandler, DeclHandler, DOMSerializer, Serializer 137 { 138 139 // DOM L3 implementation 140 protected short features = 0xFFFFFFFF; 141 protected DOMErrorHandler fDOMErrorHandler; 142 protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); 143 protected LSSerializerFilter fDOMFilter; 144 145 protected EncodingInfo _encodingInfo; 146 147 148 /** 149 * Holds array of all element states that have been entered. 150 * The array is automatically resized. When leaving an element, 151 * it's state is not removed but reused when later returning 152 * to the same nesting level. 153 */ 154 private ElementState[] _elementStates; 155 156 157 /** 158 * The index of the next state to place in the array, 159 * or one plus the index of the current state. When zero, 160 * we are in no state. 161 */ 162 private int _elementStateCount; 163 164 165 /** 166 * List holding comments and PIs that come before the root 167 * element (even after it), see {@link #serializePreRoot}. 168 */ 169 private List<String> _preRoot; 170 171 172 /** 173 * If the document has been started (header serialized), this 174 * flag is set to true so it's not started twice. 175 */ 176 protected boolean _started; 177 178 179 /** 180 * True if the serializer has been prepared. This flag is set 181 * to false when the serializer is reset prior to using it, 182 * and to true after it has been prepared for usage. 183 */ 184 private boolean _prepared; 185 186 187 /** 188 * Association between namespace URIs (keys) and prefixes (values). 189 * Accumulated here prior to starting an element and placing this 190 * list in the element state. 191 */ 192 protected Map<String, String> _prefixes; 193 194 195 /** 196 * The system identifier of the document type, if known. 197 */ 198 protected String _docTypePublicId; 199 200 201 /** 202 * The system identifier of the document type, if known. 203 */ 204 protected String _docTypeSystemId; 205 206 207 /** 208 * The output format associated with this serializer. This will never 209 * be a null reference. If no format was passed to the constructor, 210 * the default one for this document type will be used. The format 211 * object is never changed by the serializer. 212 */ 213 protected OutputFormat _format; 214 215 216 /** 217 * The printer used for printing text parts. 218 */ 219 protected Printer _printer; 220 221 222 /** 223 * True if indenting printer. 224 */ 225 protected boolean _indenting; 226 227 /** Temporary buffer to store character data */ 228 protected final StringBuffer fStrBuffer = new StringBuffer(40); 229 230 /** 231 * The underlying writer. 232 */ 233 private Writer _writer; 234 235 236 /** 237 * The output stream. 238 */ 239 private OutputStream _output; 240 241 /** Current node that is being processed */ 242 protected Node fCurrentNode = null; 243 244 245 246 //--------------------------------// 247 // Constructor and initialization // 248 //--------------------------------// 249 250 251 /** 252 * Protected constructor can only be used by derived class. 253 * Must initialize the serializer before serializing any document, 254 * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 255 * first 256 */ 257 protected BaseMarkupSerializer( OutputFormat format ) 258 { 259 int i; 260 261 _elementStates = new ElementState[ 10 ]; 262 for ( i = 0 ; i < _elementStates.length ; ++i ) 263 _elementStates[ i ] = new ElementState(); 264 _format = format; 265 } 266 267 268 public DocumentHandler asDocumentHandler() 269 throws IOException 270 { 271 prepare(); 272 return this; 273 } 274 275 276 public ContentHandler asContentHandler() 277 throws IOException 278 { 279 prepare(); 280 return this; 281 } 282 283 284 public DOMSerializer asDOMSerializer() 285 throws IOException 286 { 287 prepare(); 288 return this; 289 } 290 291 292 public void setOutputByteStream( OutputStream output ) 293 { 294 if ( output == null ) { 295 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 296 "ArgumentIsNull", new Object[]{"output"}); 297 throw new NullPointerException(msg); 298 } 299 _output = output; 300 _writer = null; 301 reset(); 302 } 303 304 305 public void setOutputCharStream( Writer writer ) 306 { 307 if ( writer == null ) { 308 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 309 "ArgumentIsNull", new Object[]{"writer"}); 310 throw new NullPointerException(msg); 311 } 312 _writer = writer; 313 _output = null; 314 reset(); 315 } 316 317 318 public void setOutputFormat( OutputFormat format ) 319 { 320 if ( format == null ) { 321 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 322 "ArgumentIsNull", new Object[]{"format"}); 323 throw new NullPointerException(msg); 324 } 325 _format = format; 326 reset(); 327 } 328 329 330 public boolean reset() 331 { 332 if ( _elementStateCount > 1 ) { 333 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 334 "ResetInMiddle", null); 335 throw new IllegalStateException(msg); 336 } 337 _prepared = false; 338 fCurrentNode = null; 339 fStrBuffer.setLength(0); 340 return true; 341 } 342 343 protected void cleanup() { 344 fCurrentNode = null; 345 } 346 347 protected void prepare() 348 throws IOException 349 { 350 if ( _prepared ) 351 return; 352 353 if ( _writer == null && _output == null ) { 354 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 355 "NoWriterSupplied", null); 356 throw new IOException(msg); 357 } 358 // If the output stream has been set, use it to construct 359 // the writer. It is possible that the serializer has been 360 // reused with the same output stream and different encoding. 361 362 _encodingInfo = _format.getEncodingInfo(); 363 364 if ( _output != null ) { 365 _writer = _encodingInfo.getWriter(_output); 366 } 367 368 if ( _format.getIndenting() ) { 369 _indenting = true; 370 _printer = new IndentPrinter( _writer, _format ); 371 } else { 372 _indenting = false; 373 _printer = new Printer( _writer, _format ); 374 } 375 376 ElementState state; 377 378 _elementStateCount = 0; 379 state = _elementStates[ 0 ]; 380 state.namespaceURI = null; 381 state.localName = null; 382 state.rawName = null; 383 state.preserveSpace = _format.getPreserveSpace(); 384 state.empty = true; 385 state.afterElement = false; 386 state.afterComment = false; 387 state.doCData = state.inCData = false; 388 state.prefixes = null; 389 390 _docTypePublicId = _format.getDoctypePublic(); 391 _docTypeSystemId = _format.getDoctypeSystem(); 392 _started = false; 393 _prepared = true; 394 } 395 396 397 398 //----------------------------------// 399 // DOM document serializing methods // 400 //----------------------------------// 401 402 403 /** 404 * Serializes the DOM element using the previously specified 405 * writer and output format. Throws an exception only if 406 * an I/O exception occured while serializing. 407 * 408 * @param elem The element to serialize 409 * @throws IOException An I/O exception occured while 410 * serializing 411 */ 412 public void serialize( Element elem ) 413 throws IOException 414 { 415 reset(); 416 prepare(); 417 serializeNode( elem ); 418 cleanup(); 419 _printer.flush(); 420 if ( _printer.getException() != null ) 421 throw _printer.getException(); 422 } 423 424 /** 425 * Serializes a node using the previously specified 426 * writer and output format. Throws an exception only if 427 * an I/O exception occured while serializing. 428 * 429 * @param node Node to serialize 430 * @throws IOException An I/O exception occured while serializing 431 */ 432 public void serialize( Node node ) throws IOException { 433 reset(); 434 prepare(); 435 serializeNode( node ); 436 //Print any PIs and Comments which appeared in 'node' 437 serializePreRoot(); 438 _printer.flush(); 439 if ( _printer.getException() != null ) 440 throw _printer.getException(); 441 } 442 443 /** 444 * Serializes the DOM document fragmnt using the previously specified 445 * writer and output format. Throws an exception only if 446 * an I/O exception occured while serializing. 447 * 448 * @param frag The document fragment to serialize 449 * @throws IOException An I/O exception occured while 450 * serializing 451 */ 452 public void serialize( DocumentFragment frag ) 453 throws IOException 454 { 455 reset(); 456 prepare(); 457 serializeNode( frag ); 458 cleanup(); 459 _printer.flush(); 460 if ( _printer.getException() != null ) 461 throw _printer.getException(); 462 } 463 464 465 /** 466 * Serializes the DOM document using the previously specified 467 * writer and output format. Throws an exception only if 468 * an I/O exception occured while serializing. 469 * 470 * @param doc The document to serialize 471 * @throws IOException An I/O exception occured while 472 * serializing 473 */ 474 public void serialize( Document doc ) 475 throws IOException 476 { 477 reset(); 478 prepare(); 479 serializeNode( doc ); 480 serializePreRoot(); 481 cleanup(); 482 _printer.flush(); 483 if ( _printer.getException() != null ) 484 throw _printer.getException(); 485 } 486 487 488 //------------------------------------------// 489 // SAX document handler serializing methods // 490 //------------------------------------------// 491 492 493 public void startDocument() 494 throws SAXException 495 { 496 try { 497 prepare(); 498 } catch ( IOException except ) { 499 throw new SAXException( except.toString() ); 500 } 501 // Nothing to do here. All the magic happens in startDocument(String) 502 } 503 504 505 public void characters( char[] chars, int start, int length ) 506 throws SAXException 507 { 508 ElementState state; 509 510 try { 511 state = content(); 512 513 // Check if text should be print as CDATA section or unescaped 514 // based on elements listed in the output format (the element 515 // state) or whether we are inside a CDATA section or entity. 516 517 if ( state.inCData || state.doCData ) { 518 int saveIndent; 519 520 // Print a CDATA section. The text is not escaped, but ']]>' 521 // appearing in the code must be identified and dealt with. 522 // The contents of a text node is considered space preserving. 523 if ( ! state.inCData ) { 524 _printer.printText( "<![CDATA[" ); 525 state.inCData = true; 526 } 527 saveIndent = _printer.getNextIndent(); 528 _printer.setNextIndent( 0 ); 529 char ch; 530 final int end = start + length; 531 for ( int index = start ; index < end; ++index ) { 532 ch = chars[index]; 533 if ( ch == ']' && index + 2 < end && 534 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 535 _printer.printText("]]]]><![CDATA[>"); 536 index +=2; 537 continue; 538 } 539 if (!XMLChar.isValid(ch)) { 540 // check if it is surrogate 541 if (++index < end) { 542 surrogates(ch, chars[index],true); 543 } 544 else { 545 fatalError("The character '"+ch+"' is an invalid XML character"); 546 } 547 continue; 548 } 549 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 550 ch == '\n' || ch == '\r' || ch == '\t' ) { 551 _printer.printText(ch); 552 } 553 else { 554 // The character is not printable -- split CDATA section 555 _printer.printText("]]>&#x"); 556 _printer.printText(Integer.toHexString(ch)); 557 _printer.printText(";<![CDATA["); 558 } 559 } 560 _printer.setNextIndent( saveIndent ); 561 562 } else { 563 564 int saveIndent; 565 566 if ( state.preserveSpace ) { 567 // If preserving space then hold of indentation so no 568 // excessive spaces are printed at line breaks, escape 569 // the text content without replacing spaces and print 570 // the text breaking only at line breaks. 571 saveIndent = _printer.getNextIndent(); 572 _printer.setNextIndent( 0 ); 573 printText( chars, start, length, true, state.unescaped ); 574 _printer.setNextIndent( saveIndent ); 575 } else { 576 printText( chars, start, length, false, state.unescaped ); 577 } 578 } 579 } catch ( IOException except ) { 580 throw new SAXException( except ); 581 } 582 } 583 584 585 public void ignorableWhitespace( char[] chars, int start, int length ) 586 throws SAXException 587 { 588 int i; 589 590 try { 591 content(); 592 593 // Print ignorable whitespaces only when indenting, after 594 // all they are indentation. Cancel the indentation to 595 // not indent twice. 596 if ( _indenting ) { 597 _printer.setThisIndent( 0 ); 598 for ( i = start ; length-- > 0 ; ++i ) 599 _printer.printText( chars[ i ] ); 600 } 601 } catch ( IOException except ) { 602 throw new SAXException( except ); 603 } 604 } 605 606 607 public final void processingInstruction( String target, String code ) 608 throws SAXException 609 { 610 try { 611 processingInstructionIO( target, code ); 612 } catch ( IOException except ) { 613 throw new SAXException( except ); 614 } 615 } 616 617 public void processingInstructionIO( String target, String code ) 618 throws IOException 619 { 620 int index; 621 ElementState state; 622 623 state = content(); 624 625 // Create the processing instruction textual representation. 626 // Make sure we don't have '?>' inside either target or code. 627 index = target.indexOf( "?>" ); 628 if ( index >= 0 ) 629 fStrBuffer.append( "<?" ).append( target.substring( 0, index ) ); 630 else 631 fStrBuffer.append( "<?" ).append( target ); 632 if ( code != null ) { 633 fStrBuffer.append( ' ' ); 634 index = code.indexOf( "?>" ); 635 if ( index >= 0 ) 636 fStrBuffer.append( code.substring( 0, index ) ); 637 else 638 fStrBuffer.append( code ); 639 } 640 fStrBuffer.append( "?>" ); 641 642 // If before the root element (or after it), do not print 643 // the PI directly but place it in the pre-root vector. 644 if ( isDocumentState() ) { 645 if ( _preRoot == null ) 646 _preRoot = new ArrayList<>(); 647 _preRoot.add( fStrBuffer.toString() ); 648 } else { 649 _printer.indent(); 650 printText( fStrBuffer.toString(), true, true ); 651 _printer.unindent(); 652 if ( _indenting ) 653 state.afterElement = true; 654 } 655 656 fStrBuffer.setLength(0); 657 } 658 659 660 public void comment( char[] chars, int start, int length ) 661 throws SAXException 662 { 663 try { 664 comment( new String( chars, start, length ) ); 665 } catch ( IOException except ) { 666 throw new SAXException( except ); 667 } 668 } 669 670 671 public void comment( String text ) 672 throws IOException 673 { 674 int index; 675 ElementState state; 676 677 if ( _format.getOmitComments() ) 678 return; 679 680 state = content(); 681 // Create the processing comment textual representation. 682 // Make sure we don't have '-->' inside the comment. 683 index = text.indexOf( "-->" ); 684 if ( index >= 0 ) 685 fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" ); 686 else 687 fStrBuffer.append( "<!--" ).append( text ).append( "-->" ); 688 689 // If before the root element (or after it), do not print 690 // the comment directly but place it in the pre-root vector. 691 if ( isDocumentState() ) { 692 if ( _preRoot == null ) 693 _preRoot = new ArrayList<>(); 694 _preRoot.add( fStrBuffer.toString() ); 695 } else { 696 // Indent this element on a new line if the first 697 // content of the parent element or immediately 698 // following an element. 699 if ( _indenting && ! state.preserveSpace) 700 _printer.breakLine(); 701 _printer.indent(); 702 printText( fStrBuffer.toString(), true, true ); 703 _printer.unindent(); 704 if ( _indenting ) 705 state.afterElement = true; 706 } 707 708 fStrBuffer.setLength(0); 709 state.afterComment = true; 710 state.afterElement = false; 711 } 712 713 714 public void startCDATA() 715 { 716 ElementState state; 717 718 state = getElementState(); 719 state.doCData = true; 720 } 721 722 723 public void endCDATA() 724 { 725 ElementState state; 726 727 state = getElementState(); 728 state.doCData = false; 729 } 730 731 732 public void startNonEscaping() 733 { 734 ElementState state; 735 736 state = getElementState(); 737 state.unescaped = true; 738 } 739 740 741 public void endNonEscaping() 742 { 743 ElementState state; 744 745 state = getElementState(); 746 state.unescaped = false; 747 } 748 749 750 public void startPreserving() 751 { 752 ElementState state; 753 754 state = getElementState(); 755 state.preserveSpace = true; 756 } 757 758 759 public void endPreserving() 760 { 761 ElementState state; 762 763 state = getElementState(); 764 state.preserveSpace = false; 765 } 766 767 768 /** 769 * Called at the end of the document to wrap it up. 770 * Will flush the output stream and throw an exception 771 * if any I/O error occured while serializing. 772 * 773 * @throws SAXException An I/O exception occured during 774 * serializing 775 */ 776 public void endDocument() 777 throws SAXException 778 { 779 try { 780 // Print all the elements accumulated outside of 781 // the root element. 782 serializePreRoot(); 783 // Flush the output, this is necessary for fStrBuffered output. 784 _printer.flush(); 785 } catch ( IOException except ) { 786 throw new SAXException( except ); 787 } 788 } 789 790 791 public void startEntity( String name ) 792 { 793 // ??? 794 } 795 796 797 public void endEntity( String name ) 798 { 799 // ??? 800 } 801 802 803 public void setDocumentLocator( Locator locator ) 804 { 805 // Nothing to do 806 } 807 808 809 //-----------------------------------------// 810 // SAX content handler serializing methods // 811 //-----------------------------------------// 812 813 814 public void skippedEntity ( String name ) 815 throws SAXException 816 { 817 try { 818 endCDATA(); 819 content(); 820 _printer.printText( '&' ); 821 _printer.printText( name ); 822 _printer.printText( ';' ); 823 } catch ( IOException except ) { 824 throw new SAXException( except ); 825 } 826 } 827 828 829 public void startPrefixMapping( String prefix, String uri ) 830 throws SAXException 831 { 832 if ( _prefixes == null ) 833 _prefixes = new HashMap<>(); 834 _prefixes.put( uri, prefix == null ? "" : prefix ); 835 } 836 837 838 public void endPrefixMapping( String prefix ) 839 throws SAXException 840 { 841 } 842 843 844 //------------------------------------------// 845 // SAX DTD/Decl handler serializing methods // 846 //------------------------------------------// 847 848 849 public final void startDTD( String name, String publicId, String systemId ) 850 throws SAXException 851 { 852 try { 853 _printer.enterDTD(); 854 _docTypePublicId = publicId; 855 _docTypeSystemId = systemId; 856 857 } catch ( IOException except ) { 858 throw new SAXException( except ); 859 } 860 } 861 862 863 public void endDTD() 864 { 865 // Nothing to do here, all the magic occurs in startDocument(String). 866 } 867 868 869 public void elementDecl( String name, String model ) 870 throws SAXException 871 { 872 try { 873 _printer.enterDTD(); 874 _printer.printText( "<!ELEMENT " ); 875 _printer.printText( name ); 876 _printer.printText( ' ' ); 877 _printer.printText( model ); 878 _printer.printText( '>' ); 879 if ( _indenting ) 880 _printer.breakLine(); 881 } catch ( IOException except ) { 882 throw new SAXException( except ); 883 } 884 } 885 886 887 public void attributeDecl( String eName, String aName, String type, 888 String valueDefault, String value ) 889 throws SAXException 890 { 891 try { 892 _printer.enterDTD(); 893 _printer.printText( "<!ATTLIST " ); 894 _printer.printText( eName ); 895 _printer.printText( ' ' ); 896 _printer.printText( aName ); 897 _printer.printText( ' ' ); 898 _printer.printText( type ); 899 if ( valueDefault != null ) { 900 _printer.printText( ' ' ); 901 _printer.printText( valueDefault ); 902 } 903 if ( value != null ) { 904 _printer.printText( " \"" ); 905 printEscaped( value ); 906 _printer.printText( '"' ); 907 } 908 _printer.printText( '>' ); 909 if ( _indenting ) 910 _printer.breakLine(); 911 } catch ( IOException except ) { 912 throw new SAXException( except ); 913 } 914 } 915 916 917 public void internalEntityDecl( String name, String value ) 918 throws SAXException 919 { 920 try { 921 _printer.enterDTD(); 922 _printer.printText( "<!ENTITY " ); 923 _printer.printText( name ); 924 _printer.printText( " \"" ); 925 printEscaped( value ); 926 _printer.printText( "\">" ); 927 if ( _indenting ) 928 _printer.breakLine(); 929 } catch ( IOException except ) { 930 throw new SAXException( except ); 931 } 932 } 933 934 935 public void externalEntityDecl( String name, String publicId, String systemId ) 936 throws SAXException 937 { 938 try { 939 _printer.enterDTD(); 940 unparsedEntityDecl( name, publicId, systemId, null ); 941 } catch ( IOException except ) { 942 throw new SAXException( except ); 943 } 944 } 945 946 947 public void unparsedEntityDecl( String name, String publicId, 948 String systemId, String notationName ) 949 throws SAXException 950 { 951 try { 952 _printer.enterDTD(); 953 if ( publicId == null ) { 954 _printer.printText( "<!ENTITY " ); 955 _printer.printText( name ); 956 _printer.printText( " SYSTEM " ); 957 printDoctypeURL( systemId ); 958 } else { 959 _printer.printText( "<!ENTITY " ); 960 _printer.printText( name ); 961 _printer.printText( " PUBLIC " ); 962 printDoctypeURL( publicId ); 963 _printer.printText( ' ' ); 964 printDoctypeURL( systemId ); 965 } 966 if ( notationName != null ) { 967 _printer.printText( " NDATA " ); 968 _printer.printText( notationName ); 969 } 970 _printer.printText( '>' ); 971 if ( _indenting ) 972 _printer.breakLine(); 973 } catch ( IOException except ) { 974 throw new SAXException( except ); 975 } 976 } 977 978 979 public void notationDecl( String name, String publicId, String systemId ) 980 throws SAXException 981 { 982 try { 983 _printer.enterDTD(); 984 if ( publicId != null ) { 985 _printer.printText( "<!NOTATION " ); 986 _printer.printText( name ); 987 _printer.printText( " PUBLIC " ); 988 printDoctypeURL( publicId ); 989 if ( systemId != null ) { 990 _printer.printText( ' ' ); 991 printDoctypeURL( systemId ); 992 } 993 } else { 994 _printer.printText( "<!NOTATION " ); 995 _printer.printText( name ); 996 _printer.printText( " SYSTEM " ); 997 printDoctypeURL( systemId ); 998 } 999 _printer.printText( '>' ); 1000 if ( _indenting ) 1001 _printer.breakLine(); 1002 } catch ( IOException except ) { 1003 throw new SAXException( except ); 1004 } 1005 } 1006 1007 1008 //------------------------------------------// 1009 // Generic node serializing methods methods // 1010 //------------------------------------------// 1011 1012 1013 /** 1014 * Serialize the DOM node. This method is shared across XML, HTML and XHTML 1015 * serializers and the differences are masked out in a separate {@link 1016 * #serializeElement}. 1017 * 1018 * @param node The node to serialize 1019 * @see #serializeElement 1020 * @throws IOException An I/O exception occured while 1021 * serializing 1022 */ 1023 @SuppressWarnings("fallthrough") // by design at case Node.DOCUMENT_FRAGMENT_NODE 1024 protected void serializeNode( Node node ) 1025 throws IOException 1026 { 1027 fCurrentNode = node; 1028 1029 // Based on the node type call the suitable SAX handler. 1030 // Only comments entities and documents which are not 1031 // handled by SAX are serialized directly. 1032 switch ( node.getNodeType() ) { 1033 case Node.TEXT_NODE : { 1034 String text; 1035 1036 text = node.getNodeValue(); 1037 if ( text != null ) { 1038 if (fDOMFilter !=null && 1039 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) { 1040 short code = fDOMFilter.acceptNode(node); 1041 switch (code) { 1042 case NodeFilter.FILTER_REJECT: 1043 case NodeFilter.FILTER_SKIP: { 1044 break; 1045 } 1046 default: { 1047 characters(text); 1048 } 1049 } 1050 } 1051 else if ( !_indenting || getElementState().preserveSpace 1052 || (text.replace('\n',' ').trim().length() != 0)) 1053 characters( text ); 1054 1055 } 1056 break; 1057 } 1058 1059 case Node.CDATA_SECTION_NODE : { 1060 String text = node.getNodeValue(); 1061 if ((features & DOMSerializerImpl.CDATA) != 0) { 1062 if (text != null) { 1063 if (fDOMFilter != null 1064 && (fDOMFilter.getWhatToShow() 1065 & NodeFilter.SHOW_CDATA_SECTION) 1066 != 0) { 1067 short code = fDOMFilter.acceptNode(node); 1068 switch (code) { 1069 case NodeFilter.FILTER_REJECT : 1070 case NodeFilter.FILTER_SKIP : 1071 { 1072 // skip the CDATA node 1073 return; 1074 } 1075 default : 1076 { 1077 //fall through.. 1078 } 1079 } 1080 } 1081 startCDATA(); 1082 characters(text); 1083 endCDATA(); 1084 } 1085 } else { 1086 // transform into a text node 1087 characters(text); 1088 } 1089 break; 1090 } 1091 case Node.COMMENT_NODE : { 1092 String text; 1093 1094 if ( ! _format.getOmitComments() ) { 1095 text = node.getNodeValue(); 1096 if ( text != null ) { 1097 1098 if (fDOMFilter !=null && 1099 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) { 1100 short code = fDOMFilter.acceptNode(node); 1101 switch (code) { 1102 case NodeFilter.FILTER_REJECT: 1103 case NodeFilter.FILTER_SKIP: { 1104 // skip the comment node 1105 return; 1106 } 1107 default: { 1108 // fall through 1109 } 1110 } 1111 } 1112 comment( text ); 1113 } 1114 } 1115 break; 1116 } 1117 1118 case Node.ENTITY_REFERENCE_NODE : { 1119 Node child; 1120 1121 endCDATA(); 1122 content(); 1123 1124 if (((features & DOMSerializerImpl.ENTITIES) != 0) 1125 || (node.getFirstChild() == null)) { 1126 if (fDOMFilter !=null && 1127 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) { 1128 short code = fDOMFilter.acceptNode(node); 1129 switch (code) { 1130 case NodeFilter.FILTER_REJECT:{ 1131 return; // remove the node 1132 } 1133 case NodeFilter.FILTER_SKIP: { 1134 child = node.getFirstChild(); 1135 while ( child != null ) { 1136 serializeNode( child ); 1137 child = child.getNextSibling(); 1138 } 1139 return; 1140 } 1141 1142 default: { 1143 // fall through 1144 } 1145 } 1146 } 1147 checkUnboundNamespacePrefixedNode(node); 1148 1149 _printer.printText("&"); 1150 _printer.printText(node.getNodeName()); 1151 _printer.printText(";"); 1152 } 1153 else { 1154 child = node.getFirstChild(); 1155 while ( child != null ) { 1156 serializeNode( child ); 1157 child = child.getNextSibling(); 1158 } 1159 } 1160 1161 break; 1162 } 1163 1164 case Node.PROCESSING_INSTRUCTION_NODE : { 1165 1166 if (fDOMFilter !=null && 1167 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) { 1168 short code = fDOMFilter.acceptNode(node); 1169 switch (code) { 1170 case NodeFilter.FILTER_REJECT: 1171 case NodeFilter.FILTER_SKIP: { 1172 return; // skip this node 1173 } 1174 default: { // fall through 1175 } 1176 } 1177 } 1178 processingInstructionIO( node.getNodeName(), node.getNodeValue() ); 1179 break; 1180 } 1181 case Node.ELEMENT_NODE : { 1182 1183 if (fDOMFilter !=null && 1184 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) { 1185 short code = fDOMFilter.acceptNode(node); 1186 switch (code) { 1187 case NodeFilter.FILTER_REJECT: { 1188 return; 1189 } 1190 case NodeFilter.FILTER_SKIP: { 1191 Node child = node.getFirstChild(); 1192 while ( child != null ) { 1193 serializeNode( child ); 1194 child = child.getNextSibling(); 1195 } 1196 return; // skip this node 1197 } 1198 1199 default: { // fall through 1200 } 1201 } 1202 } 1203 serializeElement( (Element) node ); 1204 break; 1205 } 1206 case Node.DOCUMENT_NODE : { 1207 DocumentType docType; 1208 1209 serializeDocument(); 1210 1211 // If there is a document type, use the SAX events to 1212 // serialize it. 1213 docType = ( (Document) node ).getDoctype(); 1214 if (docType != null) { 1215 // DOM Level 2 (or higher) 1216 try { 1217 String internal; 1218 1219 _printer.enterDTD(); 1220 _docTypePublicId = docType.getPublicId(); 1221 _docTypeSystemId = docType.getSystemId(); 1222 internal = docType.getInternalSubset(); 1223 if ( internal != null && internal.length() > 0 ) 1224 _printer.printText( internal ); 1225 endDTD(); 1226 } catch (Exception e) { 1227 // ignore 1228 _printer.enterDTD(); 1229 _docTypePublicId = null; 1230 _docTypeSystemId = null; 1231 endDTD(); 1232 } 1233 serializeDTD(docType.getName()); 1234 1235 } 1236 _started = true; 1237 1238 // !! Fall through 1239 } 1240 case Node.DOCUMENT_FRAGMENT_NODE : { 1241 Node child; 1242 1243 // By definition this will happen if the node is a document, 1244 // document fragment, etc. Just serialize its contents. It will 1245 // work well for other nodes that we do not know how to serialize. 1246 child = node.getFirstChild(); 1247 while ( child != null ) { 1248 serializeNode( child ); 1249 child = child.getNextSibling(); 1250 } 1251 break; 1252 } 1253 1254 default: 1255 break; 1256 } 1257 } 1258 1259 1260 /* Serializes XML Declaration, according to 'xml-declaration' property. 1261 */ 1262 protected void serializeDocument()throws IOException { 1263 int i; 1264 1265 String dtd = _printer.leaveDTD(); 1266 if (! _started) { 1267 1268 if (! _format.getOmitXMLDeclaration()) { 1269 StringBuffer buffer; 1270 1271 // Serialize the document declaration appreaing at the head 1272 // of very XML document (unless asked not to). 1273 buffer = new StringBuffer( "<?xml version=\"" ); 1274 if (_format.getVersion() != null) 1275 buffer.append( _format.getVersion() ); 1276 else 1277 buffer.append( "1.0" ); 1278 buffer.append( '"' ); 1279 String format_encoding = _format.getEncoding(); 1280 if (format_encoding != null) { 1281 buffer.append( " encoding=\"" ); 1282 buffer.append( format_encoding ); 1283 buffer.append( '"' ); 1284 } 1285 if (_format.getStandalone() && _docTypeSystemId == null && 1286 _docTypePublicId == null) 1287 buffer.append( " standalone=\"yes\"" ); 1288 buffer.append( "?>" ); 1289 _printer.printText( buffer ); 1290 _printer.breakLine(); 1291 } 1292 } 1293 1294 // Always serialize these, even if not te first root element. 1295 serializePreRoot(); 1296 1297 } 1298 1299 /* Serializes DTD, if present. 1300 */ 1301 protected void serializeDTD(String name) throws IOException{ 1302 1303 String dtd = _printer.leaveDTD(); 1304 if (! _format.getOmitDocumentType()) { 1305 if (_docTypeSystemId != null) { 1306 // System identifier must be specified to print DOCTYPE. 1307 // If public identifier is specified print 'PUBLIC 1308 // <public> <system>', if not, print 'SYSTEM <system>'. 1309 _printer.printText( "<!DOCTYPE " ); 1310 _printer.printText( name ); 1311 if (_docTypePublicId != null) { 1312 _printer.printText( " PUBLIC " ); 1313 printDoctypeURL( _docTypePublicId ); 1314 if (_indenting) { 1315 _printer.breakLine(); 1316 for (int i = 0 ; i < 18 + name.length() ; ++i) 1317 _printer.printText( " " ); 1318 } else 1319 _printer.printText( " " ); 1320 printDoctypeURL( _docTypeSystemId ); 1321 } else { 1322 _printer.printText( " SYSTEM " ); 1323 printDoctypeURL( _docTypeSystemId ); 1324 } 1325 1326 // If we accumulated any DTD contents while printing. 1327 // this would be the place to print it. 1328 if (dtd != null && dtd.length() > 0) { 1329 _printer.printText( " [" ); 1330 printText( dtd, true, true ); 1331 _printer.printText( ']' ); 1332 } 1333 1334 _printer.printText( ">" ); 1335 _printer.breakLine(); 1336 } else if (dtd != null && dtd.length() > 0) { 1337 _printer.printText( "<!DOCTYPE " ); 1338 _printer.printText( name ); 1339 _printer.printText( " [" ); 1340 printText( dtd, true, true ); 1341 _printer.printText( "]>" ); 1342 _printer.breakLine(); 1343 } 1344 } 1345 } 1346 1347 1348 /** 1349 * Must be called by a method about to print any type of content. 1350 * If the element was just opened, the opening tag is closed and 1351 * will be matched to a closing tag. Returns the current element 1352 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. 1353 * 1354 * @return The current element state 1355 * @throws IOException An I/O exception occurred while 1356 * serializing 1357 */ 1358 protected ElementState content() 1359 throws IOException 1360 { 1361 ElementState state; 1362 1363 state = getElementState(); 1364 if ( ! isDocumentState() ) { 1365 // Need to close CData section first 1366 if ( state.inCData && ! state.doCData ) { 1367 _printer.printText( "]]>" ); 1368 state.inCData = false; 1369 } 1370 // If this is the first content in the element, 1371 // change the state to not-empty and close the 1372 // opening element tag. 1373 if ( state.empty ) { 1374 _printer.printText( '>' ); 1375 state.empty = false; 1376 } 1377 // Except for one content type, all of them 1378 // are not last element. That one content 1379 // type will take care of itself. 1380 state.afterElement = false; 1381 // Except for one content type, all of them 1382 // are not last comment. That one content 1383 // type will take care of itself. 1384 state.afterComment = false; 1385 } 1386 return state; 1387 } 1388 1389 1390 /** 1391 * Called to print the text contents in the prevailing element format. 1392 * Since this method is capable of printing text as CDATA, it is used 1393 * for that purpose as well. White space handling is determined by the 1394 * current element state. In addition, the output format can dictate 1395 * whether the text is printed as CDATA or unescaped. 1396 * 1397 * @param text The text to print 1398 * @throws IOException An I/O exception occured while 1399 * serializing 1400 */ 1401 protected void characters( String text ) 1402 throws IOException 1403 { 1404 ElementState state; 1405 1406 state = content(); 1407 // Check if text should be print as CDATA section or unescaped 1408 // based on elements listed in the output format (the element 1409 // state) or whether we are inside a CDATA section or entity. 1410 1411 if ( state.inCData || state.doCData ) { 1412 // Print a CDATA section. The text is not escaped, but ']]>' 1413 // appearing in the code must be identified and dealt with. 1414 // The contents of a text node is considered space preserving. 1415 if ( ! state.inCData ) { 1416 _printer.printText("<![CDATA["); 1417 state.inCData = true; 1418 } 1419 int saveIndent = _printer.getNextIndent(); 1420 _printer.setNextIndent( 0 ); 1421 printCDATAText( text); 1422 _printer.setNextIndent( saveIndent ); 1423 1424 } else { 1425 1426 int saveIndent; 1427 1428 if ( state.preserveSpace ) { 1429 // If preserving space then hold of indentation so no 1430 // excessive spaces are printed at line breaks, escape 1431 // the text content without replacing spaces and print 1432 // the text breaking only at line breaks. 1433 saveIndent = _printer.getNextIndent(); 1434 _printer.setNextIndent( 0 ); 1435 printText( text, true, state.unescaped ); 1436 _printer.setNextIndent( saveIndent ); 1437 } else { 1438 printText( text, false, state.unescaped ); 1439 } 1440 } 1441 } 1442 1443 1444 /** 1445 * Returns the suitable entity reference for this character value, 1446 * or null if no such entity exists. Calling this method with <tt>'&'</tt> 1447 * will return <tt>"&amp;"</tt>. 1448 * 1449 * @param ch Character value 1450 * @return Character entity name, or null 1451 */ 1452 protected abstract String getEntityRef( int ch ); 1453 1454 1455 /** 1456 * Called to serializee the DOM element. The element is serialized based on 1457 * the serializer's method (XML, HTML, XHTML). 1458 * 1459 * @param elem The element to serialize 1460 * @throws IOException An I/O exception occured while 1461 * serializing 1462 */ 1463 protected abstract void serializeElement( Element elem ) 1464 throws IOException; 1465 1466 1467 /** 1468 * Comments and PIs cannot be serialized before the root element, 1469 * because the root element serializes the document type, which 1470 * generally comes first. Instead such PIs and comments are 1471 * accumulated inside a vector and serialized by calling this 1472 * method. Will be called when the root element is serialized 1473 * and when the document finished serializing. 1474 * 1475 * @throws IOException An I/O exception occured while 1476 * serializing 1477 */ 1478 protected void serializePreRoot() 1479 throws IOException 1480 { 1481 int i; 1482 1483 if ( _preRoot != null ) { 1484 for ( i = 0 ; i < _preRoot.size() ; ++i ) { 1485 printText(_preRoot.get( i ), true, true ); 1486 if ( _indenting ) 1487 _printer.breakLine(); 1488 } 1489 _preRoot.clear(); 1490 } 1491 } 1492 1493 1494 //---------------------------------------------// 1495 // Text pretty printing and formatting methods // 1496 //---------------------------------------------// 1497 1498 protected void printCDATAText( String text ) throws IOException { 1499 int length = text.length(); 1500 char ch; 1501 1502 for ( int index = 0 ; index < length; ++index ) { 1503 ch = text.charAt( index ); 1504 if (ch == ']' 1505 && index + 2 < length 1506 && text.charAt(index + 1) == ']' 1507 && text.charAt(index + 2) == '>') { // check for ']]>' 1508 if (fDOMErrorHandler != null) { 1509 // REVISIT: this means that if DOM Error handler is not registered we don't report any 1510 // fatal errors and might serialize not wellformed document 1511 if ((features & DOMSerializerImpl.SPLITCDATA) == 0) { 1512 String msg = DOMMessageFormatter.formatMessage( 1513 DOMMessageFormatter.SERIALIZER_DOMAIN, 1514 "EndingCDATA", 1515 null); 1516 if ((features & DOMSerializerImpl.WELLFORMED) != 0) { 1517 // issue fatal error 1518 modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode); 1519 fDOMErrorHandler.handleError(fDOMError); 1520 throw new LSException(LSException.SERIALIZE_ERR, msg); 1521 } 1522 // issue error 1523 modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode); 1524 if (!fDOMErrorHandler.handleError(fDOMError)) { 1525 throw new LSException(LSException.SERIALIZE_ERR, msg); 1526 } 1527 } else { 1528 // issue warning 1529 String msg = 1530 DOMMessageFormatter.formatMessage( 1531 DOMMessageFormatter.SERIALIZER_DOMAIN, 1532 "SplittingCDATA", 1533 null); 1534 modifyDOMError( 1535 msg, 1536 DOMError.SEVERITY_WARNING, 1537 null, fCurrentNode); 1538 fDOMErrorHandler.handleError(fDOMError); 1539 } 1540 } 1541 // split CDATA section 1542 _printer.printText("]]]]><![CDATA[>"); 1543 index += 2; 1544 continue; 1545 } 1546 1547 if (!XMLChar.isValid(ch)) { 1548 // check if it is surrogate 1549 if (++index <length) { 1550 surrogates(ch, text.charAt(index),true); 1551 } 1552 else { 1553 fatalError("The character '"+ch+"' is an invalid XML character"); 1554 } 1555 continue; 1556 } 1557 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 1558 ch == '\n' || ch == '\r' || ch == '\t' ) { 1559 _printer.printText(ch); 1560 } 1561 else { 1562 1563 // The character is not printable -- split CDATA section 1564 _printer.printText("]]>&#x"); 1565 _printer.printText(Integer.toHexString(ch)); 1566 _printer.printText(";<![CDATA["); 1567 } 1568 } 1569 } 1570 1571 1572 protected void surrogates(int high, int low, boolean inContent) throws IOException{ 1573 if (XMLChar.isHighSurrogate(high)) { 1574 if (!XMLChar.isLowSurrogate(low)) { 1575 //Invalid XML 1576 fatalError("The character '"+(char)low+"' is an invalid XML character"); 1577 } 1578 else { 1579 int supplemental = XMLChar.supplemental((char)high, (char)low); 1580 if (!XMLChar.isValid(supplemental)) { 1581 //Invalid XML 1582 fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 1583 } 1584 else { 1585 if (inContent && content().inCData) { 1586 _printer.printText("]]>&#x"); 1587 _printer.printText(Integer.toHexString(supplemental)); 1588 _printer.printText(";<![CDATA["); 1589 } 1590 else { 1591 printHex(supplemental); 1592 } 1593 } 1594 } 1595 } else { 1596 fatalError("The character '"+(char)high+"' is an invalid XML character"); 1597 } 1598 1599 } 1600 1601 /** 1602 * Called to print additional text with whitespace handling. 1603 * If spaces are preserved, the text is printed as if by calling 1604 * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine} 1605 * for each new line. If spaces are not preserved, the text is 1606 * broken at space boundaries if longer than the line width; 1607 * Multiple spaces are printed as such, but spaces at beginning 1608 * of line are removed. 1609 * 1610 * @param chars The text to print 1611 * @param start The start offset 1612 * @param length The number of characters 1613 * @param preserveSpace Space preserving flag 1614 * @param unescaped Print unescaped 1615 */ 1616 protected void printText( char[] chars, int start, int length, 1617 boolean preserveSpace, boolean unescaped ) 1618 throws IOException 1619 { 1620 1621 if ( preserveSpace ) { 1622 // Preserving spaces: the text must print exactly as it is, 1623 // without breaking when spaces appear in the text and without 1624 // consolidating spaces. If a line terminator is used, a line 1625 // break will occur. 1626 while ( length-- > 0 ) { 1627 char ch = chars[ start ]; 1628 ++start; 1629 if ( ch == '\n' || ch == '\r' || unescaped ) { 1630 _printer.printText( ch ); 1631 } 1632 else { 1633 printEscaped( ch ); 1634 } 1635 } 1636 } else { 1637 // Not preserving spaces: print one part at a time, and 1638 // use spaces between parts to break them into different 1639 // lines. Spaces at beginning of line will be stripped 1640 // by printing mechanism. Line terminator is treated 1641 // no different than other text part. 1642 while ( length-- > 0 ) { 1643 char ch = chars[ start ]; 1644 ++start; 1645 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1646 _printer.printSpace(); 1647 } 1648 else if ( unescaped ) { 1649 _printer.printText( ch ); 1650 } 1651 else { 1652 printEscaped( ch ); 1653 } 1654 } 1655 } 1656 } 1657 1658 1659 protected void printText( String text, boolean preserveSpace, boolean unescaped ) 1660 throws IOException 1661 { 1662 int index; 1663 char ch; 1664 1665 if ( preserveSpace ) { 1666 // Preserving spaces: the text must print exactly as it is, 1667 // without breaking when spaces appear in the text and without 1668 // consolidating spaces. If a line terminator is used, a line 1669 // break will occur. 1670 for ( index = 0 ; index < text.length() ; ++index ) { 1671 ch = text.charAt( index ); 1672 if ( ch == '\n' || ch == '\r' || unescaped ) 1673 _printer.printText( ch ); 1674 else 1675 printEscaped( ch ); 1676 } 1677 } else { 1678 // Not preserving spaces: print one part at a time, and 1679 // use spaces between parts to break them into different 1680 // lines. Spaces at beginning of line will be stripped 1681 // by printing mechanism. Line terminator is treated 1682 // no different than other text part. 1683 for ( index = 0 ; index < text.length() ; ++index ) { 1684 ch = text.charAt( index ); 1685 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1686 _printer.printSpace(); 1687 } 1688 else if ( unescaped ) { 1689 _printer.printText( ch ); 1690 } 1691 else { 1692 printEscaped( ch ); 1693 } 1694 } 1695 } 1696 } 1697 1698 1699 /** 1700 * Print a document type public or system identifier URL. 1701 * Encapsulates the URL in double quotes, escapes non-printing 1702 * characters and print it equivalent to {@link #printText}. 1703 * 1704 * @param url The document type url to print 1705 */ 1706 protected void printDoctypeURL( String url ) 1707 throws IOException 1708 { 1709 int i; 1710 1711 _printer.printText( '"' ); 1712 for( i = 0 ; i < url.length() ; ++i ) { 1713 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) { 1714 _printer.printText( '%' ); 1715 _printer.printText( Integer.toHexString( url.charAt( i ) ) ); 1716 } else 1717 _printer.printText( url.charAt( i ) ); 1718 } 1719 _printer.printText( '"' ); 1720 } 1721 1722 1723 protected void printEscaped( int ch ) 1724 throws IOException 1725 { 1726 String charRef; 1727 // If there is a suitable entity reference for this 1728 // character, print it. The list of available entity 1729 // references is almost but not identical between 1730 // XML and HTML. 1731 charRef = getEntityRef( ch ); 1732 if ( charRef != null ) { 1733 _printer.printText( '&' ); 1734 _printer.printText( charRef ); 1735 _printer.printText( ';' ); 1736 } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0x7F ) || 1737 ch == '\n' || ch == '\r' || ch == '\t' ) { 1738 // Non printables are below ASCII space but not tab or line 1739 // terminator, ASCII delete, or above a certain Unicode threshold. 1740 if (ch < 0x10000) { 1741 _printer.printText((char)ch ); 1742 } else { 1743 _printer.printText((char)(((ch-0x10000)>>10)+0xd800)); 1744 _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00)); 1745 } 1746 } else { 1747 printHex(ch); 1748 } 1749 } 1750 1751 /** 1752 * Escapes chars 1753 */ 1754 final void printHex( int ch) throws IOException { 1755 _printer.printText( "&#x" ); 1756 _printer.printText(Integer.toHexString(ch)); 1757 _printer.printText( ';' ); 1758 1759 } 1760 1761 1762 /** 1763 * Escapes a string so it may be printed as text content or attribute 1764 * value. Non printable characters are escaped using character references. 1765 * Where the format specifies a deault entity reference, that reference 1766 * is used (e.g. <tt>&lt;</tt>). 1767 * 1768 * @param source The string to escape 1769 */ 1770 protected void printEscaped( String source ) 1771 throws IOException 1772 { 1773 for ( int i = 0 ; i < source.length() ; ++i ) { 1774 int ch = source.charAt(i); 1775 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) { 1776 int lowch = source.charAt(i+1); 1777 if ((lowch & 0xfc00) == 0xdc00) { 1778 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00; 1779 i++; 1780 } 1781 } 1782 printEscaped(ch); 1783 } 1784 } 1785 1786 1787 //--------------------------------// 1788 // Element state handling methods // 1789 //--------------------------------// 1790 1791 1792 /** 1793 * Return the state of the current element. 1794 * 1795 * @return Current element state 1796 */ 1797 protected ElementState getElementState() 1798 { 1799 return _elementStates[ _elementStateCount ]; 1800 } 1801 1802 1803 /** 1804 * Enter a new element state for the specified element. 1805 * Tag name and space preserving is specified, element 1806 * state is initially empty. 1807 * 1808 * @return Current element state, or null 1809 */ 1810 protected ElementState enterElementState( String namespaceURI, String localName, 1811 String rawName, boolean preserveSpace ) 1812 { 1813 ElementState state; 1814 1815 if ( _elementStateCount + 1 == _elementStates.length ) { 1816 ElementState[] newStates; 1817 1818 // Need to create a larger array of states. This does not happen 1819 // often, unless the document is really deep. 1820 newStates = new ElementState[ _elementStates.length + 10 ]; 1821 for ( int i = 0 ; i < _elementStates.length ; ++i ) 1822 newStates[ i ] = _elementStates[ i ]; 1823 for ( int i = _elementStates.length ; i < newStates.length ; ++i ) 1824 newStates[ i ] = new ElementState(); 1825 _elementStates = newStates; 1826 } 1827 1828 ++_elementStateCount; 1829 state = _elementStates[ _elementStateCount ]; 1830 state.namespaceURI = namespaceURI; 1831 state.localName = localName; 1832 state.rawName = rawName; 1833 state.preserveSpace = preserveSpace; 1834 state.empty = true; 1835 state.afterElement = false; 1836 state.afterComment = false; 1837 state.doCData = state.inCData = false; 1838 state.unescaped = false; 1839 state.prefixes = _prefixes; 1840 1841 _prefixes = null; 1842 return state; 1843 } 1844 1845 1846 /** 1847 * Leave the current element state and return to the 1848 * state of the parent element. If this was the root 1849 * element, return to the state of the document. 1850 * 1851 * @return Previous element state 1852 */ 1853 protected ElementState leaveElementState() 1854 { 1855 if ( _elementStateCount > 0 ) { 1856 /*Corrected by David Blondeau (blondeau@intalio.com)*/ 1857 _prefixes = null; 1858 //_prefixes = _elementStates[ _elementStateCount ].prefixes; 1859 -- _elementStateCount; 1860 return _elementStates[ _elementStateCount ]; 1861 } 1862 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null); 1863 throw new IllegalStateException(msg); 1864 } 1865 1866 1867 /** 1868 * Returns true if in the state of the document. 1869 * Returns true before entering any element and after 1870 * leaving the root element. 1871 * 1872 * @return True if in the state of the document 1873 */ 1874 protected boolean isDocumentState() { 1875 return _elementStateCount == 0; 1876 } 1877 1878 /** Clears document state. **/ 1879 final void clearDocumentState() { 1880 _elementStateCount = 0; 1881 } 1882 1883 /** 1884 * Returns the namespace prefix for the specified URI. 1885 * If the URI has been mapped to a prefix, returns the 1886 * prefix, otherwise returns null. 1887 * 1888 * @param namespaceURI The namespace URI 1889 * @return The namespace prefix if known, or null 1890 */ 1891 protected String getPrefix( String namespaceURI ) 1892 { 1893 String prefix; 1894 1895 if ( _prefixes != null ) { 1896 prefix = _prefixes.get( namespaceURI ); 1897 if ( prefix != null ) 1898 return prefix; 1899 } 1900 if ( _elementStateCount == 0 ) { 1901 return null; 1902 } 1903 for ( int i = _elementStateCount ; i > 0 ; --i ) { 1904 if ( _elementStates[ i ].prefixes != null ) { 1905 prefix = _elementStates[ i ].prefixes.get( namespaceURI ); 1906 if ( prefix != null ) 1907 return prefix; 1908 } 1909 } 1910 return null; 1911 } 1912 1913 /** 1914 * The method modifies global DOM error object 1915 * 1916 * @param message 1917 * @param severity 1918 * @param type 1919 * @return a DOMError 1920 */ 1921 protected DOMError modifyDOMError(String message, short severity, String type, Node node){ 1922 fDOMError.reset(); 1923 fDOMError.fMessage = message; 1924 fDOMError.fType = type; 1925 fDOMError.fSeverity = severity; 1926 fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null); 1927 return fDOMError; 1928 1929 } 1930 1931 1932 protected void fatalError(String message) throws IOException{ 1933 if (fDOMErrorHandler != null) { 1934 modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode); 1935 fDOMErrorHandler.handleError(fDOMError); 1936 } 1937 else { 1938 throw new IOException(message); 1939 } 1940 } 1941 1942 /** 1943 * DOM level 3: 1944 * Check a node to determine if it contains unbound namespace prefixes. 1945 * 1946 * @param node The node to check for unbound namespace prefices 1947 */ 1948 protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{ 1949 1950 } 1951 }