1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 // Sep 14, 2000: 22 // Fixed comments to preserve whitespaces and add a line break 23 // when indenting. Reported by Gervase Markham <gerv@gerv.net> 24 // Sep 14, 2000: 25 // Fixed serializer to report IO exception directly, instead at 26 // the end of document processing. 27 // Reported by Patrick Higgins <phiggins@transzap.com> 28 // Sep 13, 2000: 29 // CR in character data will print as D; 30 // Aug 25, 2000: 31 // Fixed processing instruction printing inside element content 32 // to not escape content. Reported by Mikael Staldal 33 // <d96-mst@d.kth.se> 34 // Aug 25, 2000: 35 // Added ability to omit comments. 36 // Contributed by Anupam Bagchi <abagchi@jtcsv.com> 37 // Aug 26, 2000: 38 // Fixed bug in newline handling when preserving spaces. 39 // Contributed by Mike Dusseault <mdusseault@home.com> 40 // Aug 29, 2000: 41 // Fixed state.unescaped not being set to false when 42 // entering element state. 43 // Reported by Lowell Vaughn <lvaughn@agillion.com> 44 45 46 package com.sun.org.apache.xml.internal.serialize; 47 48 49 import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl; 50 import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl; 51 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 52 import com.sun.org.apache.xerces.internal.util.XMLChar; 53 import java.io.IOException; 54 import java.io.OutputStream; 55 import java.io.Writer; 56 import java.util.HashMap; 57 import java.util.Map; 58 import java.util.Vector; 59 import org.w3c.dom.DOMError; 60 import org.w3c.dom.DOMErrorHandler; 61 import org.w3c.dom.Document; 62 import org.w3c.dom.DocumentFragment; 63 import org.w3c.dom.DocumentType; 64 import org.w3c.dom.Element; 65 import org.w3c.dom.Node; 66 import org.w3c.dom.ls.LSException; 67 import org.w3c.dom.ls.LSSerializerFilter; 68 import org.w3c.dom.traversal.NodeFilter; 69 import org.xml.sax.ContentHandler; 70 import org.xml.sax.DTDHandler; 71 import org.xml.sax.DocumentHandler; 72 import org.xml.sax.Locator; 73 import org.xml.sax.SAXException; 74 import org.xml.sax.ext.DeclHandler; 75 import org.xml.sax.ext.LexicalHandler; 76 77 /** 78 * Base class for a serializer supporting both DOM and SAX pretty 79 * serializing of XML/HTML/XHTML documents. Derives classes perform 80 * the method-specific serializing, this class provides the common 81 * serializing mechanisms. 82 * <p> 83 * The serializer must be initialized with the proper writer and 84 * output format before it can be used by calling {@link #setOutputCharStream} 85 * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} 86 * for the output format. 87 * <p> 88 * The serializer can be reused any number of times, but cannot 89 * be used concurrently by two threads. 90 * <p> 91 * If an output stream is used, the encoding is taken from the 92 * output format (defaults to <tt>UTF-8</tt>). If a writer is 93 * used, make sure the writer uses the same encoding (if applies) 94 * as specified in the output format. 95 * <p> 96 * The serializer supports both DOM and SAX. DOM serializing is done 97 * by calling {@link #serialize(Document)} and SAX serializing is done by firing 98 * SAX events and using the serializer as a document handler. 99 * This also applies to derived class. 100 * <p> 101 * If an I/O exception occurs while serializing, the serializer 102 * will not throw an exception directly, but only throw it 103 * at the end of serializing (either DOM or SAX's {@link 104 * org.xml.sax.DocumentHandler#endDocument}. 105 * <p> 106 * For elements that are not specified as whitespace preserving, 107 * the serializer will potentially break long text lines at space 108 * boundaries, indent lines, and serialize elements on separate 109 * lines. Line terminators will be regarded as spaces, and 110 * spaces at beginning of line will be stripped. 111 * <p> 112 * When indenting, the serializer is capable of detecting seemingly 113 * element content, and serializing these elements indented on separate 114 * lines. An element is serialized indented when it is the first or 115 * last child of an element, or immediate following or preceding 116 * another element. 117 * 118 * 119 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 120 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> 121 * @author Elena Litani, IBM 122 * @author Sunitha Reddy, Sun Microsystems 123 * @see Serializer 124 * @see org.w3c.dom.ls.LSSerializer 125 * 126 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 127 * is replaced by that of Xalan. Main class 128 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 129 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 130 */ 131 public abstract class BaseMarkupSerializer 132 implements ContentHandler, DocumentHandler, LexicalHandler, 133 DTDHandler, DeclHandler, DOMSerializer, Serializer 134 { 135 136 // DOM L3 implementation 137 protected short features = 0xFFFFFFFF; 138 protected DOMErrorHandler fDOMErrorHandler; 139 protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); 140 protected LSSerializerFilter fDOMFilter; 141 142 protected EncodingInfo _encodingInfo; 143 144 145 /** 146 * Holds array of all element states that have been entered. 147 * The array is automatically resized. When leaving an element, 148 * it's state is not removed but reused when later returning 149 * to the same nesting level. 150 */ 151 private ElementState[] _elementStates; 152 153 154 /** 155 * The index of the next state to place in the array, 156 * or one plus the index of the current state. When zero, 157 * we are in no state. 158 */ 159 private int _elementStateCount; 160 161 162 /** 163 * Vector holding comments and PIs that come before the root 164 * element (even after it), see {@link #serializePreRoot}. 165 */ 166 private Vector _preRoot; 167 168 169 /** 170 * If the document has been started (header serialized), this 171 * flag is set to true so it's not started twice. 172 */ 173 protected boolean _started; 174 175 176 /** 177 * True if the serializer has been prepared. This flag is set 178 * to false when the serializer is reset prior to using it, 179 * and to true after it has been prepared for usage. 180 */ 181 private boolean _prepared; 182 183 184 /** 185 * Association between namespace URIs (keys) and prefixes (values). 186 * Accumulated here prior to starting an element and placing this 187 * list in the element state. 188 */ 189 protected Map<String, String> _prefixes; 190 191 192 /** 193 * The system identifier of the document type, if known. 194 */ 195 protected String _docTypePublicId; 196 197 198 /** 199 * The system identifier of the document type, if known. 200 */ 201 protected String _docTypeSystemId; 202 203 204 /** 205 * The output format associated with this serializer. This will never 206 * be a null reference. If no format was passed to the constructor, 207 * the default one for this document type will be used. The format 208 * object is never changed by the serializer. 209 */ 210 protected OutputFormat _format; 211 212 213 /** 214 * The printer used for printing text parts. 215 */ 216 protected Printer _printer; 217 218 219 /** 220 * True if indenting printer. 221 */ 222 protected boolean _indenting; 223 224 /** Temporary buffer to store character data */ 225 protected final StringBuffer fStrBuffer = new StringBuffer(40); 226 227 /** 228 * The underlying writer. 229 */ 230 private Writer _writer; 231 232 233 /** 234 * The output stream. 235 */ 236 private OutputStream _output; 237 238 /** Current node that is being processed */ 239 protected Node fCurrentNode = null; 240 241 242 243 //--------------------------------// 244 // Constructor and initialization // 245 //--------------------------------// 246 247 248 /** 249 * Protected constructor can only be used by derived class. 250 * Must initialize the serializer before serializing any document, 251 * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 252 * first 253 */ 254 protected BaseMarkupSerializer( OutputFormat format ) 255 { 256 int i; 257 258 _elementStates = new ElementState[ 10 ]; 259 for ( i = 0 ; i < _elementStates.length ; ++i ) 260 _elementStates[ i ] = new ElementState(); 261 _format = format; 262 } 263 264 265 public DocumentHandler asDocumentHandler() 266 throws IOException 267 { 268 prepare(); 269 return this; 270 } 271 272 273 public ContentHandler asContentHandler() 274 throws IOException 275 { 276 prepare(); 277 return this; 278 } 279 280 281 public DOMSerializer asDOMSerializer() 282 throws IOException 283 { 284 prepare(); 285 return this; 286 } 287 288 289 public void setOutputByteStream( OutputStream output ) 290 { 291 if ( output == null ) { 292 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 293 "ArgumentIsNull", new Object[]{"output"}); 294 throw new NullPointerException(msg); 295 } 296 _output = output; 297 _writer = null; 298 reset(); 299 } 300 301 302 public void setOutputCharStream( Writer writer ) 303 { 304 if ( writer == null ) { 305 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 306 "ArgumentIsNull", new Object[]{"writer"}); 307 throw new NullPointerException(msg); 308 } 309 _writer = writer; 310 _output = null; 311 reset(); 312 } 313 314 315 public void setOutputFormat( OutputFormat format ) 316 { 317 if ( format == null ) { 318 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 319 "ArgumentIsNull", new Object[]{"format"}); 320 throw new NullPointerException(msg); 321 } 322 _format = format; 323 reset(); 324 } 325 326 327 public boolean reset() 328 { 329 if ( _elementStateCount > 1 ) { 330 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 331 "ResetInMiddle", null); 332 throw new IllegalStateException(msg); 333 } 334 _prepared = false; 335 fCurrentNode = null; 336 fStrBuffer.setLength(0); 337 return true; 338 } 339 340 protected void cleanup() { 341 fCurrentNode = null; 342 } 343 344 protected void prepare() 345 throws IOException 346 { 347 if ( _prepared ) 348 return; 349 350 if ( _writer == null && _output == null ) { 351 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 352 "NoWriterSupplied", null); 353 throw new IOException(msg); 354 } 355 // If the output stream has been set, use it to construct 356 // the writer. It is possible that the serializer has been 357 // reused with the same output stream and different encoding. 358 359 _encodingInfo = _format.getEncodingInfo(); 360 361 if ( _output != null ) { 362 _writer = _encodingInfo.getWriter(_output); 363 } 364 365 if ( _format.getIndenting() ) { 366 _indenting = true; 367 _printer = new IndentPrinter( _writer, _format ); 368 } else { 369 _indenting = false; 370 _printer = new Printer( _writer, _format ); 371 } 372 373 ElementState state; 374 375 _elementStateCount = 0; 376 state = _elementStates[ 0 ]; 377 state.namespaceURI = null; 378 state.localName = null; 379 state.rawName = null; 380 state.preserveSpace = _format.getPreserveSpace(); 381 state.empty = true; 382 state.afterElement = false; 383 state.afterComment = false; 384 state.doCData = state.inCData = false; 385 state.prefixes = null; 386 387 _docTypePublicId = _format.getDoctypePublic(); 388 _docTypeSystemId = _format.getDoctypeSystem(); 389 _started = false; 390 _prepared = true; 391 } 392 393 394 395 //----------------------------------// 396 // DOM document serializing methods // 397 //----------------------------------// 398 399 400 /** 401 * Serializes the DOM element using the previously specified 402 * writer and output format. Throws an exception only if 403 * an I/O exception occured while serializing. 404 * 405 * @param elem The element to serialize 406 * @throws IOException An I/O exception occured while 407 * serializing 408 */ 409 public void serialize( Element elem ) 410 throws IOException 411 { 412 reset(); 413 prepare(); 414 serializeNode( elem ); 415 cleanup(); 416 _printer.flush(); 417 if ( _printer.getException() != null ) 418 throw _printer.getException(); 419 } 420 421 /** 422 * Serializes a node using the previously specified 423 * writer and output format. Throws an exception only if 424 * an I/O exception occured while serializing. 425 * 426 * @param node Node to serialize 427 * @throws IOException An I/O exception occured while serializing 428 */ 429 public void serialize( Node node ) throws IOException { 430 reset(); 431 prepare(); 432 serializeNode( node ); 433 //Print any PIs and Comments which appeared in 'node' 434 serializePreRoot(); 435 _printer.flush(); 436 if ( _printer.getException() != null ) 437 throw _printer.getException(); 438 } 439 440 /** 441 * Serializes the DOM document fragmnt using the previously specified 442 * writer and output format. Throws an exception only if 443 * an I/O exception occured while serializing. 444 * 445 * @param frag The document fragment to serialize 446 * @throws IOException An I/O exception occured while 447 * serializing 448 */ 449 public void serialize( DocumentFragment frag ) 450 throws IOException 451 { 452 reset(); 453 prepare(); 454 serializeNode( frag ); 455 cleanup(); 456 _printer.flush(); 457 if ( _printer.getException() != null ) 458 throw _printer.getException(); 459 } 460 461 462 /** 463 * Serializes the DOM document using the previously specified 464 * writer and output format. Throws an exception only if 465 * an I/O exception occured while serializing. 466 * 467 * @param doc The document to serialize 468 * @throws IOException An I/O exception occured while 469 * serializing 470 */ 471 public void serialize( Document doc ) 472 throws IOException 473 { 474 reset(); 475 prepare(); 476 serializeNode( doc ); 477 serializePreRoot(); 478 cleanup(); 479 _printer.flush(); 480 if ( _printer.getException() != null ) 481 throw _printer.getException(); 482 } 483 484 485 //------------------------------------------// 486 // SAX document handler serializing methods // 487 //------------------------------------------// 488 489 490 public void startDocument() 491 throws SAXException 492 { 493 try { 494 prepare(); 495 } catch ( IOException except ) { 496 throw new SAXException( except.toString() ); 497 } 498 // Nothing to do here. All the magic happens in startDocument(String) 499 } 500 501 502 public void characters( char[] chars, int start, int length ) 503 throws SAXException 504 { 505 ElementState state; 506 507 try { 508 state = content(); 509 510 // Check if text should be print as CDATA section or unescaped 511 // based on elements listed in the output format (the element 512 // state) or whether we are inside a CDATA section or entity. 513 514 if ( state.inCData || state.doCData ) { 515 int saveIndent; 516 517 // Print a CDATA section. The text is not escaped, but ']]>' 518 // appearing in the code must be identified and dealt with. 519 // The contents of a text node is considered space preserving. 520 if ( ! state.inCData ) { 521 _printer.printText( "<![CDATA[" ); 522 state.inCData = true; 523 } 524 saveIndent = _printer.getNextIndent(); 525 _printer.setNextIndent( 0 ); 526 char ch; 527 final int end = start + length; 528 for ( int index = start ; index < end; ++index ) { 529 ch = chars[index]; 530 if ( ch == ']' && index + 2 < end && 531 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 532 _printer.printText("]]]]><![CDATA[>"); 533 index +=2; 534 continue; 535 } 536 if (!XMLChar.isValid(ch)) { 537 // check if it is surrogate 538 if (++index < end) { 539 surrogates(ch, chars[index],true); 540 } 541 else { 542 fatalError("The character '"+ch+"' is an invalid XML character"); 543 } 544 continue; 545 } 546 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 547 ch == '\n' || ch == '\r' || ch == '\t' ) { 548 _printer.printText(ch); 549 } 550 else { 551 // The character is not printable -- split CDATA section 552 _printer.printText("]]>&#x"); 553 _printer.printText(Integer.toHexString(ch)); 554 _printer.printText(";<![CDATA["); 555 } 556 } 557 _printer.setNextIndent( saveIndent ); 558 559 } else { 560 561 int saveIndent; 562 563 if ( state.preserveSpace ) { 564 // If preserving space then hold of indentation so no 565 // excessive spaces are printed at line breaks, escape 566 // the text content without replacing spaces and print 567 // the text breaking only at line breaks. 568 saveIndent = _printer.getNextIndent(); 569 _printer.setNextIndent( 0 ); 570 printText( chars, start, length, true, state.unescaped ); 571 _printer.setNextIndent( saveIndent ); 572 } else { 573 printText( chars, start, length, false, state.unescaped ); 574 } 575 } 576 } catch ( IOException except ) { 577 throw new SAXException( except ); 578 } 579 } 580 581 582 public void ignorableWhitespace( char[] chars, int start, int length ) 583 throws SAXException 584 { 585 int i; 586 587 try { 588 content(); 589 590 // Print ignorable whitespaces only when indenting, after 591 // all they are indentation. Cancel the indentation to 592 // not indent twice. 593 if ( _indenting ) { 594 _printer.setThisIndent( 0 ); 595 for ( i = start ; length-- > 0 ; ++i ) 596 _printer.printText( chars[ i ] ); 597 } 598 } catch ( IOException except ) { 599 throw new SAXException( except ); 600 } 601 } 602 603 604 public final void processingInstruction( String target, String code ) 605 throws SAXException 606 { 607 try { 608 processingInstructionIO( target, code ); 609 } catch ( IOException except ) { 610 throw new SAXException( except ); 611 } 612 } 613 614 public void processingInstructionIO( String target, String code ) 615 throws IOException 616 { 617 int index; 618 ElementState state; 619 620 state = content(); 621 622 // Create the processing instruction textual representation. 623 // Make sure we don't have '?>' inside either target or code. 624 index = target.indexOf( "?>" ); 625 if ( index >= 0 ) 626 fStrBuffer.append( "<?" ).append( target.substring( 0, index ) ); 627 else 628 fStrBuffer.append( "<?" ).append( target ); 629 if ( code != null ) { 630 fStrBuffer.append( ' ' ); 631 index = code.indexOf( "?>" ); 632 if ( index >= 0 ) 633 fStrBuffer.append( code.substring( 0, index ) ); 634 else 635 fStrBuffer.append( code ); 636 } 637 fStrBuffer.append( "?>" ); 638 639 // If before the root element (or after it), do not print 640 // the PI directly but place it in the pre-root vector. 641 if ( isDocumentState() ) { 642 if ( _preRoot == null ) 643 _preRoot = new Vector(); 644 _preRoot.addElement( fStrBuffer.toString() ); 645 } else { 646 _printer.indent(); 647 printText( fStrBuffer.toString(), true, true ); 648 _printer.unindent(); 649 if ( _indenting ) 650 state.afterElement = true; 651 } 652 653 fStrBuffer.setLength(0); 654 } 655 656 657 public void comment( char[] chars, int start, int length ) 658 throws SAXException 659 { 660 try { 661 comment( new String( chars, start, length ) ); 662 } catch ( IOException except ) { 663 throw new SAXException( except ); 664 } 665 } 666 667 668 public void comment( String text ) 669 throws IOException 670 { 671 int index; 672 ElementState state; 673 674 if ( _format.getOmitComments() ) 675 return; 676 677 state = content(); 678 // Create the processing comment textual representation. 679 // Make sure we don't have '-->' inside the comment. 680 index = text.indexOf( "-->" ); 681 if ( index >= 0 ) 682 fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" ); 683 else 684 fStrBuffer.append( "<!--" ).append( text ).append( "-->" ); 685 686 // If before the root element (or after it), do not print 687 // the comment directly but place it in the pre-root vector. 688 if ( isDocumentState() ) { 689 if ( _preRoot == null ) 690 _preRoot = new Vector(); 691 _preRoot.addElement( fStrBuffer.toString() ); 692 } else { 693 // Indent this element on a new line if the first 694 // content of the parent element or immediately 695 // following an element. 696 if ( _indenting && ! state.preserveSpace) 697 _printer.breakLine(); 698 _printer.indent(); 699 printText( fStrBuffer.toString(), true, true ); 700 _printer.unindent(); 701 if ( _indenting ) 702 state.afterElement = true; 703 } 704 705 fStrBuffer.setLength(0); 706 state.afterComment = true; 707 state.afterElement = false; 708 } 709 710 711 public void startCDATA() 712 { 713 ElementState state; 714 715 state = getElementState(); 716 state.doCData = true; 717 } 718 719 720 public void endCDATA() 721 { 722 ElementState state; 723 724 state = getElementState(); 725 state.doCData = false; 726 } 727 728 729 public void startNonEscaping() 730 { 731 ElementState state; 732 733 state = getElementState(); 734 state.unescaped = true; 735 } 736 737 738 public void endNonEscaping() 739 { 740 ElementState state; 741 742 state = getElementState(); 743 state.unescaped = false; 744 } 745 746 747 public void startPreserving() 748 { 749 ElementState state; 750 751 state = getElementState(); 752 state.preserveSpace = true; 753 } 754 755 756 public void endPreserving() 757 { 758 ElementState state; 759 760 state = getElementState(); 761 state.preserveSpace = false; 762 } 763 764 765 /** 766 * Called at the end of the document to wrap it up. 767 * Will flush the output stream and throw an exception 768 * if any I/O error occured while serializing. 769 * 770 * @throws SAXException An I/O exception occured during 771 * serializing 772 */ 773 public void endDocument() 774 throws SAXException 775 { 776 try { 777 // Print all the elements accumulated outside of 778 // the root element. 779 serializePreRoot(); 780 // Flush the output, this is necessary for fStrBuffered output. 781 _printer.flush(); 782 } catch ( IOException except ) { 783 throw new SAXException( except ); 784 } 785 } 786 787 788 public void startEntity( String name ) 789 { 790 // ??? 791 } 792 793 794 public void endEntity( String name ) 795 { 796 // ??? 797 } 798 799 800 public void setDocumentLocator( Locator locator ) 801 { 802 // Nothing to do 803 } 804 805 806 //-----------------------------------------// 807 // SAX content handler serializing methods // 808 //-----------------------------------------// 809 810 811 public void skippedEntity ( String name ) 812 throws SAXException 813 { 814 try { 815 endCDATA(); 816 content(); 817 _printer.printText( '&' ); 818 _printer.printText( name ); 819 _printer.printText( ';' ); 820 } catch ( IOException except ) { 821 throw new SAXException( except ); 822 } 823 } 824 825 826 public void startPrefixMapping( String prefix, String uri ) 827 throws SAXException 828 { 829 if ( _prefixes == null ) 830 _prefixes = new HashMap<>(); 831 _prefixes.put( uri, prefix == null ? "" : prefix ); 832 } 833 834 835 public void endPrefixMapping( String prefix ) 836 throws SAXException 837 { 838 } 839 840 841 //------------------------------------------// 842 // SAX DTD/Decl handler serializing methods // 843 //------------------------------------------// 844 845 846 public final void startDTD( String name, String publicId, String systemId ) 847 throws SAXException 848 { 849 try { 850 _printer.enterDTD(); 851 _docTypePublicId = publicId; 852 _docTypeSystemId = systemId; 853 854 } catch ( IOException except ) { 855 throw new SAXException( except ); 856 } 857 } 858 859 860 public void endDTD() 861 { 862 // Nothing to do here, all the magic occurs in startDocument(String). 863 } 864 865 866 public void elementDecl( String name, String model ) 867 throws SAXException 868 { 869 try { 870 _printer.enterDTD(); 871 _printer.printText( "<!ELEMENT " ); 872 _printer.printText( name ); 873 _printer.printText( ' ' ); 874 _printer.printText( model ); 875 _printer.printText( '>' ); 876 if ( _indenting ) 877 _printer.breakLine(); 878 } catch ( IOException except ) { 879 throw new SAXException( except ); 880 } 881 } 882 883 884 public void attributeDecl( String eName, String aName, String type, 885 String valueDefault, String value ) 886 throws SAXException 887 { 888 try { 889 _printer.enterDTD(); 890 _printer.printText( "<!ATTLIST " ); 891 _printer.printText( eName ); 892 _printer.printText( ' ' ); 893 _printer.printText( aName ); 894 _printer.printText( ' ' ); 895 _printer.printText( type ); 896 if ( valueDefault != null ) { 897 _printer.printText( ' ' ); 898 _printer.printText( valueDefault ); 899 } 900 if ( value != null ) { 901 _printer.printText( " \"" ); 902 printEscaped( value ); 903 _printer.printText( '"' ); 904 } 905 _printer.printText( '>' ); 906 if ( _indenting ) 907 _printer.breakLine(); 908 } catch ( IOException except ) { 909 throw new SAXException( except ); 910 } 911 } 912 913 914 public void internalEntityDecl( String name, String value ) 915 throws SAXException 916 { 917 try { 918 _printer.enterDTD(); 919 _printer.printText( "<!ENTITY " ); 920 _printer.printText( name ); 921 _printer.printText( " \"" ); 922 printEscaped( value ); 923 _printer.printText( "\">" ); 924 if ( _indenting ) 925 _printer.breakLine(); 926 } catch ( IOException except ) { 927 throw new SAXException( except ); 928 } 929 } 930 931 932 public void externalEntityDecl( String name, String publicId, String systemId ) 933 throws SAXException 934 { 935 try { 936 _printer.enterDTD(); 937 unparsedEntityDecl( name, publicId, systemId, null ); 938 } catch ( IOException except ) { 939 throw new SAXException( except ); 940 } 941 } 942 943 944 public void unparsedEntityDecl( String name, String publicId, 945 String systemId, String notationName ) 946 throws SAXException 947 { 948 try { 949 _printer.enterDTD(); 950 if ( publicId == null ) { 951 _printer.printText( "<!ENTITY " ); 952 _printer.printText( name ); 953 _printer.printText( " SYSTEM " ); 954 printDoctypeURL( systemId ); 955 } else { 956 _printer.printText( "<!ENTITY " ); 957 _printer.printText( name ); 958 _printer.printText( " PUBLIC " ); 959 printDoctypeURL( publicId ); 960 _printer.printText( ' ' ); 961 printDoctypeURL( systemId ); 962 } 963 if ( notationName != null ) { 964 _printer.printText( " NDATA " ); 965 _printer.printText( notationName ); 966 } 967 _printer.printText( '>' ); 968 if ( _indenting ) 969 _printer.breakLine(); 970 } catch ( IOException except ) { 971 throw new SAXException( except ); 972 } 973 } 974 975 976 public void notationDecl( String name, String publicId, String systemId ) 977 throws SAXException 978 { 979 try { 980 _printer.enterDTD(); 981 if ( publicId != null ) { 982 _printer.printText( "<!NOTATION " ); 983 _printer.printText( name ); 984 _printer.printText( " PUBLIC " ); 985 printDoctypeURL( publicId ); 986 if ( systemId != null ) { 987 _printer.printText( ' ' ); 988 printDoctypeURL( systemId ); 989 } 990 } else { 991 _printer.printText( "<!NOTATION " ); 992 _printer.printText( name ); 993 _printer.printText( " SYSTEM " ); 994 printDoctypeURL( systemId ); 995 } 996 _printer.printText( '>' ); 997 if ( _indenting ) 998 _printer.breakLine(); 999 } catch ( IOException except ) { 1000 throw new SAXException( except ); 1001 } 1002 } 1003 1004 1005 //------------------------------------------// 1006 // Generic node serializing methods methods // 1007 //------------------------------------------// 1008 1009 1010 /** 1011 * Serialize the DOM node. This method is shared across XML, HTML and XHTML 1012 * serializers and the differences are masked out in a separate {@link 1013 * #serializeElement}. 1014 * 1015 * @param node The node to serialize 1016 * @see #serializeElement 1017 * @throws IOException An I/O exception occured while 1018 * serializing 1019 */ 1020 protected void serializeNode( Node node ) 1021 throws IOException 1022 { 1023 fCurrentNode = node; 1024 1025 // Based on the node type call the suitable SAX handler. 1026 // Only comments entities and documents which are not 1027 // handled by SAX are serialized directly. 1028 switch ( node.getNodeType() ) { 1029 case Node.TEXT_NODE : { 1030 String text; 1031 1032 text = node.getNodeValue(); 1033 if ( text != null ) { 1034 if (fDOMFilter !=null && 1035 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) { 1036 short code = fDOMFilter.acceptNode(node); 1037 switch (code) { 1038 case NodeFilter.FILTER_REJECT: 1039 case NodeFilter.FILTER_SKIP: { 1040 break; 1041 } 1042 default: { 1043 characters(text); 1044 } 1045 } 1046 } 1047 else if ( !_indenting || getElementState().preserveSpace 1048 || (text.replace('\n',' ').trim().length() != 0)) 1049 characters( text ); 1050 1051 } 1052 break; 1053 } 1054 1055 case Node.CDATA_SECTION_NODE : { 1056 String text = node.getNodeValue(); 1057 if ((features & DOMSerializerImpl.CDATA) != 0) { 1058 if (text != null) { 1059 if (fDOMFilter != null 1060 && (fDOMFilter.getWhatToShow() 1061 & NodeFilter.SHOW_CDATA_SECTION) 1062 != 0) { 1063 short code = fDOMFilter.acceptNode(node); 1064 switch (code) { 1065 case NodeFilter.FILTER_REJECT : 1066 case NodeFilter.FILTER_SKIP : 1067 { 1068 // skip the CDATA node 1069 return; 1070 } 1071 default : 1072 { 1073 //fall through.. 1074 } 1075 } 1076 } 1077 startCDATA(); 1078 characters(text); 1079 endCDATA(); 1080 } 1081 } else { 1082 // transform into a text node 1083 characters(text); 1084 } 1085 break; 1086 } 1087 case Node.COMMENT_NODE : { 1088 String text; 1089 1090 if ( ! _format.getOmitComments() ) { 1091 text = node.getNodeValue(); 1092 if ( text != null ) { 1093 1094 if (fDOMFilter !=null && 1095 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) { 1096 short code = fDOMFilter.acceptNode(node); 1097 switch (code) { 1098 case NodeFilter.FILTER_REJECT: 1099 case NodeFilter.FILTER_SKIP: { 1100 // skip the comment node 1101 return; 1102 } 1103 default: { 1104 // fall through 1105 } 1106 } 1107 } 1108 comment( text ); 1109 } 1110 } 1111 break; 1112 } 1113 1114 case Node.ENTITY_REFERENCE_NODE : { 1115 Node child; 1116 1117 endCDATA(); 1118 content(); 1119 1120 if (((features & DOMSerializerImpl.ENTITIES) != 0) 1121 || (node.getFirstChild() == null)) { 1122 if (fDOMFilter !=null && 1123 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) { 1124 short code = fDOMFilter.acceptNode(node); 1125 switch (code) { 1126 case NodeFilter.FILTER_REJECT:{ 1127 return; // remove the node 1128 } 1129 case NodeFilter.FILTER_SKIP: { 1130 child = node.getFirstChild(); 1131 while ( child != null ) { 1132 serializeNode( child ); 1133 child = child.getNextSibling(); 1134 } 1135 return; 1136 } 1137 1138 default: { 1139 // fall through 1140 } 1141 } 1142 } 1143 checkUnboundNamespacePrefixedNode(node); 1144 1145 _printer.printText("&"); 1146 _printer.printText(node.getNodeName()); 1147 _printer.printText(";"); 1148 } 1149 else { 1150 child = node.getFirstChild(); 1151 while ( child != null ) { 1152 serializeNode( child ); 1153 child = child.getNextSibling(); 1154 } 1155 } 1156 1157 break; 1158 } 1159 1160 case Node.PROCESSING_INSTRUCTION_NODE : { 1161 1162 if (fDOMFilter !=null && 1163 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) { 1164 short code = fDOMFilter.acceptNode(node); 1165 switch (code) { 1166 case NodeFilter.FILTER_REJECT: 1167 case NodeFilter.FILTER_SKIP: { 1168 return; // skip this node 1169 } 1170 default: { // fall through 1171 } 1172 } 1173 } 1174 processingInstructionIO( node.getNodeName(), node.getNodeValue() ); 1175 break; 1176 } 1177 case Node.ELEMENT_NODE : { 1178 1179 if (fDOMFilter !=null && 1180 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) { 1181 short code = fDOMFilter.acceptNode(node); 1182 switch (code) { 1183 case NodeFilter.FILTER_REJECT: { 1184 return; 1185 } 1186 case NodeFilter.FILTER_SKIP: { 1187 Node child = node.getFirstChild(); 1188 while ( child != null ) { 1189 serializeNode( child ); 1190 child = child.getNextSibling(); 1191 } 1192 return; // skip this node 1193 } 1194 1195 default: { // fall through 1196 } 1197 } 1198 } 1199 serializeElement( (Element) node ); 1200 break; 1201 } 1202 case Node.DOCUMENT_NODE : { 1203 DocumentType docType; 1204 1205 serializeDocument(); 1206 1207 // If there is a document type, use the SAX events to 1208 // serialize it. 1209 docType = ( (Document) node ).getDoctype(); 1210 if (docType != null) { 1211 // DOM Level 2 (or higher) 1212 try { 1213 String internal; 1214 1215 _printer.enterDTD(); 1216 _docTypePublicId = docType.getPublicId(); 1217 _docTypeSystemId = docType.getSystemId(); 1218 internal = docType.getInternalSubset(); 1219 if ( internal != null && internal.length() > 0 ) 1220 _printer.printText( internal ); 1221 endDTD(); 1222 } catch (Exception e) { 1223 // ignore 1224 _printer.enterDTD(); 1225 _docTypePublicId = null; 1226 _docTypeSystemId = null; 1227 endDTD(); 1228 } 1229 serializeDTD(docType.getName()); 1230 1231 } 1232 _started = true; 1233 1234 // !! Fall through 1235 } 1236 case Node.DOCUMENT_FRAGMENT_NODE : { 1237 Node child; 1238 1239 // By definition this will happen if the node is a document, 1240 // document fragment, etc. Just serialize its contents. It will 1241 // work well for other nodes that we do not know how to serialize. 1242 child = node.getFirstChild(); 1243 while ( child != null ) { 1244 serializeNode( child ); 1245 child = child.getNextSibling(); 1246 } 1247 break; 1248 } 1249 1250 default: 1251 break; 1252 } 1253 } 1254 1255 1256 /* Serializes XML Declaration, according to 'xml-declaration' property. 1257 */ 1258 protected void serializeDocument()throws IOException { 1259 int i; 1260 1261 String dtd = _printer.leaveDTD(); 1262 if (! _started) { 1263 1264 if (! _format.getOmitXMLDeclaration()) { 1265 StringBuffer buffer; 1266 1267 // Serialize the document declaration appreaing at the head 1268 // of very XML document (unless asked not to). 1269 buffer = new StringBuffer( "<?xml version=\"" ); 1270 if (_format.getVersion() != null) 1271 buffer.append( _format.getVersion() ); 1272 else 1273 buffer.append( "1.0" ); 1274 buffer.append( '"' ); 1275 String format_encoding = _format.getEncoding(); 1276 if (format_encoding != null) { 1277 buffer.append( " encoding=\"" ); 1278 buffer.append( format_encoding ); 1279 buffer.append( '"' ); 1280 } 1281 if (_format.getStandalone() && _docTypeSystemId == null && 1282 _docTypePublicId == null) 1283 buffer.append( " standalone=\"yes\"" ); 1284 buffer.append( "?>" ); 1285 _printer.printText( buffer ); 1286 _printer.breakLine(); 1287 } 1288 } 1289 1290 // Always serialize these, even if not te first root element. 1291 serializePreRoot(); 1292 1293 } 1294 1295 /* Serializes DTD, if present. 1296 */ 1297 protected void serializeDTD(String name) throws IOException{ 1298 1299 String dtd = _printer.leaveDTD(); 1300 if (! _format.getOmitDocumentType()) { 1301 if (_docTypeSystemId != null) { 1302 // System identifier must be specified to print DOCTYPE. 1303 // If public identifier is specified print 'PUBLIC 1304 // <public> <system>', if not, print 'SYSTEM <system>'. 1305 _printer.printText( "<!DOCTYPE " ); 1306 _printer.printText( name ); 1307 if (_docTypePublicId != null) { 1308 _printer.printText( " PUBLIC " ); 1309 printDoctypeURL( _docTypePublicId ); 1310 if (_indenting) { 1311 _printer.breakLine(); 1312 for (int i = 0 ; i < 18 + name.length() ; ++i) 1313 _printer.printText( " " ); 1314 } else 1315 _printer.printText( " " ); 1316 printDoctypeURL( _docTypeSystemId ); 1317 } else { 1318 _printer.printText( " SYSTEM " ); 1319 printDoctypeURL( _docTypeSystemId ); 1320 } 1321 1322 // If we accumulated any DTD contents while printing. 1323 // this would be the place to print it. 1324 if (dtd != null && dtd.length() > 0) { 1325 _printer.printText( " [" ); 1326 printText( dtd, true, true ); 1327 _printer.printText( ']' ); 1328 } 1329 1330 _printer.printText( ">" ); 1331 _printer.breakLine(); 1332 } else if (dtd != null && dtd.length() > 0) { 1333 _printer.printText( "<!DOCTYPE " ); 1334 _printer.printText( name ); 1335 _printer.printText( " [" ); 1336 printText( dtd, true, true ); 1337 _printer.printText( "]>" ); 1338 _printer.breakLine(); 1339 } 1340 } 1341 } 1342 1343 1344 /** 1345 * Must be called by a method about to print any type of content. 1346 * If the element was just opened, the opening tag is closed and 1347 * will be matched to a closing tag. Returns the current element 1348 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. 1349 * 1350 * @return The current element state 1351 * @throws IOException An I/O exception occurred while 1352 * serializing 1353 */ 1354 protected ElementState content() 1355 throws IOException 1356 { 1357 ElementState state; 1358 1359 state = getElementState(); 1360 if ( ! isDocumentState() ) { 1361 // Need to close CData section first 1362 if ( state.inCData && ! state.doCData ) { 1363 _printer.printText( "]]>" ); 1364 state.inCData = false; 1365 } 1366 // If this is the first content in the element, 1367 // change the state to not-empty and close the 1368 // opening element tag. 1369 if ( state.empty ) { 1370 _printer.printText( '>' ); 1371 state.empty = false; 1372 } 1373 // Except for one content type, all of them 1374 // are not last element. That one content 1375 // type will take care of itself. 1376 state.afterElement = false; 1377 // Except for one content type, all of them 1378 // are not last comment. That one content 1379 // type will take care of itself. 1380 state.afterComment = false; 1381 } 1382 return state; 1383 } 1384 1385 1386 /** 1387 * Called to print the text contents in the prevailing element format. 1388 * Since this method is capable of printing text as CDATA, it is used 1389 * for that purpose as well. White space handling is determined by the 1390 * current element state. In addition, the output format can dictate 1391 * whether the text is printed as CDATA or unescaped. 1392 * 1393 * @param text The text to print 1394 * @throws IOException An I/O exception occured while 1395 * serializing 1396 */ 1397 protected void characters( String text ) 1398 throws IOException 1399 { 1400 ElementState state; 1401 1402 state = content(); 1403 // Check if text should be print as CDATA section or unescaped 1404 // based on elements listed in the output format (the element 1405 // state) or whether we are inside a CDATA section or entity. 1406 1407 if ( state.inCData || state.doCData ) { 1408 // Print a CDATA section. The text is not escaped, but ']]>' 1409 // appearing in the code must be identified and dealt with. 1410 // The contents of a text node is considered space preserving. 1411 if ( ! state.inCData ) { 1412 _printer.printText("<![CDATA["); 1413 state.inCData = true; 1414 } 1415 int saveIndent = _printer.getNextIndent(); 1416 _printer.setNextIndent( 0 ); 1417 printCDATAText( text); 1418 _printer.setNextIndent( saveIndent ); 1419 1420 } else { 1421 1422 int saveIndent; 1423 1424 if ( state.preserveSpace ) { 1425 // If preserving space then hold of indentation so no 1426 // excessive spaces are printed at line breaks, escape 1427 // the text content without replacing spaces and print 1428 // the text breaking only at line breaks. 1429 saveIndent = _printer.getNextIndent(); 1430 _printer.setNextIndent( 0 ); 1431 printText( text, true, state.unescaped ); 1432 _printer.setNextIndent( saveIndent ); 1433 } else { 1434 printText( text, false, state.unescaped ); 1435 } 1436 } 1437 } 1438 1439 1440 /** 1441 * Returns the suitable entity reference for this character value, 1442 * or null if no such entity exists. Calling this method with <tt>'&'</tt> 1443 * will return <tt>"&amp;"</tt>. 1444 * 1445 * @param ch Character value 1446 * @return Character entity name, or null 1447 */ 1448 protected abstract String getEntityRef( int ch ); 1449 1450 1451 /** 1452 * Called to serializee the DOM element. The element is serialized based on 1453 * the serializer's method (XML, HTML, XHTML). 1454 * 1455 * @param elem The element to serialize 1456 * @throws IOException An I/O exception occured while 1457 * serializing 1458 */ 1459 protected abstract void serializeElement( Element elem ) 1460 throws IOException; 1461 1462 1463 /** 1464 * Comments and PIs cannot be serialized before the root element, 1465 * because the root element serializes the document type, which 1466 * generally comes first. Instead such PIs and comments are 1467 * accumulated inside a vector and serialized by calling this 1468 * method. Will be called when the root element is serialized 1469 * and when the document finished serializing. 1470 * 1471 * @throws IOException An I/O exception occured while 1472 * serializing 1473 */ 1474 protected void serializePreRoot() 1475 throws IOException 1476 { 1477 int i; 1478 1479 if ( _preRoot != null ) { 1480 for ( i = 0 ; i < _preRoot.size() ; ++i ) { 1481 printText( (String) _preRoot.elementAt( i ), true, true ); 1482 if ( _indenting ) 1483 _printer.breakLine(); 1484 } 1485 _preRoot.removeAllElements(); 1486 } 1487 } 1488 1489 1490 //---------------------------------------------// 1491 // Text pretty printing and formatting methods // 1492 //---------------------------------------------// 1493 1494 protected void printCDATAText( String text ) throws IOException { 1495 int length = text.length(); 1496 char ch; 1497 1498 for ( int index = 0 ; index < length; ++index ) { 1499 ch = text.charAt( index ); 1500 if (ch == ']' 1501 && index + 2 < length 1502 && text.charAt(index + 1) == ']' 1503 && text.charAt(index + 2) == '>') { // check for ']]>' 1504 if (fDOMErrorHandler != null) { 1505 // REVISIT: this means that if DOM Error handler is not registered we don't report any 1506 // fatal errors and might serialize not wellformed document 1507 if ((features & DOMSerializerImpl.SPLITCDATA) == 0) { 1508 String msg = DOMMessageFormatter.formatMessage( 1509 DOMMessageFormatter.SERIALIZER_DOMAIN, 1510 "EndingCDATA", 1511 null); 1512 if ((features & DOMSerializerImpl.WELLFORMED) != 0) { 1513 // issue fatal error 1514 modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode); 1515 fDOMErrorHandler.handleError(fDOMError); 1516 throw new LSException(LSException.SERIALIZE_ERR, msg); 1517 } 1518 // issue error 1519 modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode); 1520 if (!fDOMErrorHandler.handleError(fDOMError)) { 1521 throw new LSException(LSException.SERIALIZE_ERR, msg); 1522 } 1523 } else { 1524 // issue warning 1525 String msg = 1526 DOMMessageFormatter.formatMessage( 1527 DOMMessageFormatter.SERIALIZER_DOMAIN, 1528 "SplittingCDATA", 1529 null); 1530 modifyDOMError( 1531 msg, 1532 DOMError.SEVERITY_WARNING, 1533 null, fCurrentNode); 1534 fDOMErrorHandler.handleError(fDOMError); 1535 } 1536 } 1537 // split CDATA section 1538 _printer.printText("]]]]><![CDATA[>"); 1539 index += 2; 1540 continue; 1541 } 1542 1543 if (!XMLChar.isValid(ch)) { 1544 // check if it is surrogate 1545 if (++index <length) { 1546 surrogates(ch, text.charAt(index),true); 1547 } 1548 else { 1549 fatalError("The character '"+ch+"' is an invalid XML character"); 1550 } 1551 continue; 1552 } 1553 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 1554 ch == '\n' || ch == '\r' || ch == '\t' ) { 1555 _printer.printText(ch); 1556 } 1557 else { 1558 1559 // The character is not printable -- split CDATA section 1560 _printer.printText("]]>&#x"); 1561 _printer.printText(Integer.toHexString(ch)); 1562 _printer.printText(";<![CDATA["); 1563 } 1564 } 1565 } 1566 1567 1568 protected void surrogates(int high, int low, boolean inContent) throws IOException{ 1569 if (XMLChar.isHighSurrogate(high)) { 1570 if (!XMLChar.isLowSurrogate(low)) { 1571 //Invalid XML 1572 fatalError("The character '"+(char)low+"' is an invalid XML character"); 1573 } 1574 else { 1575 int supplemental = XMLChar.supplemental((char)high, (char)low); 1576 if (!XMLChar.isValid(supplemental)) { 1577 //Invalid XML 1578 fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 1579 } 1580 else { 1581 if (inContent && content().inCData) { 1582 _printer.printText("]]>&#x"); 1583 _printer.printText(Integer.toHexString(supplemental)); 1584 _printer.printText(";<![CDATA["); 1585 } 1586 else { 1587 printHex(supplemental); 1588 } 1589 } 1590 } 1591 } else { 1592 fatalError("The character '"+(char)high+"' is an invalid XML character"); 1593 } 1594 1595 } 1596 1597 /** 1598 * Called to print additional text with whitespace handling. 1599 * If spaces are preserved, the text is printed as if by calling 1600 * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine} 1601 * for each new line. If spaces are not preserved, the text is 1602 * broken at space boundaries if longer than the line width; 1603 * Multiple spaces are printed as such, but spaces at beginning 1604 * of line are removed. 1605 * 1606 * @param chars The text to print 1607 * @param start The start offset 1608 * @param length The number of characters 1609 * @param preserveSpace Space preserving flag 1610 * @param unescaped Print unescaped 1611 */ 1612 protected void printText( char[] chars, int start, int length, 1613 boolean preserveSpace, boolean unescaped ) 1614 throws IOException 1615 { 1616 1617 if ( preserveSpace ) { 1618 // Preserving spaces: the text must print exactly as it is, 1619 // without breaking when spaces appear in the text and without 1620 // consolidating spaces. If a line terminator is used, a line 1621 // break will occur. 1622 while ( length-- > 0 ) { 1623 char ch = chars[ start ]; 1624 ++start; 1625 if ( ch == '\n' || ch == '\r' || unescaped ) { 1626 _printer.printText( ch ); 1627 } 1628 else { 1629 printEscaped( ch ); 1630 } 1631 } 1632 } else { 1633 // Not preserving spaces: print one part at a time, and 1634 // use spaces between parts to break them into different 1635 // lines. Spaces at beginning of line will be stripped 1636 // by printing mechanism. Line terminator is treated 1637 // no different than other text part. 1638 while ( length-- > 0 ) { 1639 char ch = chars[ start ]; 1640 ++start; 1641 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1642 _printer.printSpace(); 1643 } 1644 else if ( unescaped ) { 1645 _printer.printText( ch ); 1646 } 1647 else { 1648 printEscaped( ch ); 1649 } 1650 } 1651 } 1652 } 1653 1654 1655 protected void printText( String text, boolean preserveSpace, boolean unescaped ) 1656 throws IOException 1657 { 1658 int index; 1659 char ch; 1660 1661 if ( preserveSpace ) { 1662 // Preserving spaces: the text must print exactly as it is, 1663 // without breaking when spaces appear in the text and without 1664 // consolidating spaces. If a line terminator is used, a line 1665 // break will occur. 1666 for ( index = 0 ; index < text.length() ; ++index ) { 1667 ch = text.charAt( index ); 1668 if ( ch == '\n' || ch == '\r' || unescaped ) 1669 _printer.printText( ch ); 1670 else 1671 printEscaped( ch ); 1672 } 1673 } else { 1674 // Not preserving spaces: print one part at a time, and 1675 // use spaces between parts to break them into different 1676 // lines. Spaces at beginning of line will be stripped 1677 // by printing mechanism. Line terminator is treated 1678 // no different than other text part. 1679 for ( index = 0 ; index < text.length() ; ++index ) { 1680 ch = text.charAt( index ); 1681 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1682 _printer.printSpace(); 1683 } 1684 else if ( unescaped ) { 1685 _printer.printText( ch ); 1686 } 1687 else { 1688 printEscaped( ch ); 1689 } 1690 } 1691 } 1692 } 1693 1694 1695 /** 1696 * Print a document type public or system identifier URL. 1697 * Encapsulates the URL in double quotes, escapes non-printing 1698 * characters and print it equivalent to {@link #printText}. 1699 * 1700 * @param url The document type url to print 1701 */ 1702 protected void printDoctypeURL( String url ) 1703 throws IOException 1704 { 1705 int i; 1706 1707 _printer.printText( '"' ); 1708 for( i = 0 ; i < url.length() ; ++i ) { 1709 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) { 1710 _printer.printText( '%' ); 1711 _printer.printText( Integer.toHexString( url.charAt( i ) ) ); 1712 } else 1713 _printer.printText( url.charAt( i ) ); 1714 } 1715 _printer.printText( '"' ); 1716 } 1717 1718 1719 protected void printEscaped( int ch ) 1720 throws IOException 1721 { 1722 String charRef; 1723 // If there is a suitable entity reference for this 1724 // character, print it. The list of available entity 1725 // references is almost but not identical between 1726 // XML and HTML. 1727 charRef = getEntityRef( ch ); 1728 if ( charRef != null ) { 1729 _printer.printText( '&' ); 1730 _printer.printText( charRef ); 1731 _printer.printText( ';' ); 1732 } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0x7F ) || 1733 ch == '\n' || ch == '\r' || ch == '\t' ) { 1734 // Non printables are below ASCII space but not tab or line 1735 // terminator, ASCII delete, or above a certain Unicode threshold. 1736 if (ch < 0x10000) { 1737 _printer.printText((char)ch ); 1738 } else { 1739 _printer.printText((char)(((ch-0x10000)>>10)+0xd800)); 1740 _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00)); 1741 } 1742 } else { 1743 printHex(ch); 1744 } 1745 } 1746 1747 /** 1748 * Escapes chars 1749 */ 1750 final void printHex( int ch) throws IOException { 1751 _printer.printText( "&#x" ); 1752 _printer.printText(Integer.toHexString(ch)); 1753 _printer.printText( ';' ); 1754 1755 } 1756 1757 1758 /** 1759 * Escapes a string so it may be printed as text content or attribute 1760 * value. Non printable characters are escaped using character references. 1761 * Where the format specifies a deault entity reference, that reference 1762 * is used (e.g. <tt>&lt;</tt>). 1763 * 1764 * @param source The string to escape 1765 */ 1766 protected void printEscaped( String source ) 1767 throws IOException 1768 { 1769 for ( int i = 0 ; i < source.length() ; ++i ) { 1770 int ch = source.charAt(i); 1771 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) { 1772 int lowch = source.charAt(i+1); 1773 if ((lowch & 0xfc00) == 0xdc00) { 1774 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00; 1775 i++; 1776 } 1777 } 1778 printEscaped(ch); 1779 } 1780 } 1781 1782 1783 //--------------------------------// 1784 // Element state handling methods // 1785 //--------------------------------// 1786 1787 1788 /** 1789 * Return the state of the current element. 1790 * 1791 * @return Current element state 1792 */ 1793 protected ElementState getElementState() 1794 { 1795 return _elementStates[ _elementStateCount ]; 1796 } 1797 1798 1799 /** 1800 * Enter a new element state for the specified element. 1801 * Tag name and space preserving is specified, element 1802 * state is initially empty. 1803 * 1804 * @return Current element state, or null 1805 */ 1806 protected ElementState enterElementState( String namespaceURI, String localName, 1807 String rawName, boolean preserveSpace ) 1808 { 1809 ElementState state; 1810 1811 if ( _elementStateCount + 1 == _elementStates.length ) { 1812 ElementState[] newStates; 1813 1814 // Need to create a larger array of states. This does not happen 1815 // often, unless the document is really deep. 1816 newStates = new ElementState[ _elementStates.length + 10 ]; 1817 for ( int i = 0 ; i < _elementStates.length ; ++i ) 1818 newStates[ i ] = _elementStates[ i ]; 1819 for ( int i = _elementStates.length ; i < newStates.length ; ++i ) 1820 newStates[ i ] = new ElementState(); 1821 _elementStates = newStates; 1822 } 1823 1824 ++_elementStateCount; 1825 state = _elementStates[ _elementStateCount ]; 1826 state.namespaceURI = namespaceURI; 1827 state.localName = localName; 1828 state.rawName = rawName; 1829 state.preserveSpace = preserveSpace; 1830 state.empty = true; 1831 state.afterElement = false; 1832 state.afterComment = false; 1833 state.doCData = state.inCData = false; 1834 state.unescaped = false; 1835 state.prefixes = _prefixes; 1836 1837 _prefixes = null; 1838 return state; 1839 } 1840 1841 1842 /** 1843 * Leave the current element state and return to the 1844 * state of the parent element. If this was the root 1845 * element, return to the state of the document. 1846 * 1847 * @return Previous element state 1848 */ 1849 protected ElementState leaveElementState() 1850 { 1851 if ( _elementStateCount > 0 ) { 1852 /*Corrected by David Blondeau (blondeau@intalio.com)*/ 1853 _prefixes = null; 1854 //_prefixes = _elementStates[ _elementStateCount ].prefixes; 1855 -- _elementStateCount; 1856 return _elementStates[ _elementStateCount ]; 1857 } 1858 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null); 1859 throw new IllegalStateException(msg); 1860 } 1861 1862 1863 /** 1864 * Returns true if in the state of the document. 1865 * Returns true before entering any element and after 1866 * leaving the root element. 1867 * 1868 * @return True if in the state of the document 1869 */ 1870 protected boolean isDocumentState() { 1871 return _elementStateCount == 0; 1872 } 1873 1874 /** Clears document state. **/ 1875 final void clearDocumentState() { 1876 _elementStateCount = 0; 1877 } 1878 1879 /** 1880 * Returns the namespace prefix for the specified URI. 1881 * If the URI has been mapped to a prefix, returns the 1882 * prefix, otherwise returns null. 1883 * 1884 * @param namespaceURI The namespace URI 1885 * @return The namespace prefix if known, or null 1886 */ 1887 protected String getPrefix( String namespaceURI ) 1888 { 1889 String prefix; 1890 1891 if ( _prefixes != null ) { 1892 prefix = _prefixes.get( namespaceURI ); 1893 if ( prefix != null ) 1894 return prefix; 1895 } 1896 if ( _elementStateCount == 0 ) { 1897 return null; 1898 } 1899 for ( int i = _elementStateCount ; i > 0 ; --i ) { 1900 if ( _elementStates[ i ].prefixes != null ) { 1901 prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI ); 1902 if ( prefix != null ) 1903 return prefix; 1904 } 1905 } 1906 return null; 1907 } 1908 1909 /** 1910 * The method modifies global DOM error object 1911 * 1912 * @param message 1913 * @param severity 1914 * @param type 1915 * @return a DOMError 1916 */ 1917 protected DOMError modifyDOMError(String message, short severity, String type, Node node){ 1918 fDOMError.reset(); 1919 fDOMError.fMessage = message; 1920 fDOMError.fType = type; 1921 fDOMError.fSeverity = severity; 1922 fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null); 1923 return fDOMError; 1924 1925 } 1926 1927 1928 protected void fatalError(String message) throws IOException{ 1929 if (fDOMErrorHandler != null) { 1930 modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode); 1931 fDOMErrorHandler.handleError(fDOMError); 1932 } 1933 else { 1934 throw new IOException(message); 1935 } 1936 } 1937 1938 /** 1939 * DOM level 3: 1940 * Check a node to determine if it contains unbound namespace prefixes. 1941 * 1942 * @param node The node to check for unbound namespace prefices 1943 */ 1944 protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{ 1945 1946 } 1947 }