1 /* 2 * Copyright (c) 2015, 2017 Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 // Sep 14, 2000: 22 // Fixed comments to preserve whitespaces and add a line break 23 // when indenting. Reported by Gervase Markham <gerv@gerv.net> 24 // Sep 14, 2000: 25 // Fixed serializer to report IO exception directly, instead at 26 // the end of document processing. 27 // Reported by Patrick Higgins <phiggins@transzap.com> 28 // Sep 13, 2000: 29 // CR in character data will print as D; 30 // Aug 25, 2000: 31 // Fixed processing instruction printing inside element content 32 // to not escape content. Reported by Mikael Staldal 33 // <d96-mst@d.kth.se> 34 // Aug 25, 2000: 35 // Added ability to omit comments. 36 // Contributed by Anupam Bagchi <abagchi@jtcsv.com> 37 // Aug 26, 2000: 38 // Fixed bug in newline handling when preserving spaces. 39 // Contributed by Mike Dusseault <mdusseault@home.com> 40 // Aug 29, 2000: 41 // Fixed state.unescaped not being set to false when 42 // entering element state. 43 // Reported by Lowell Vaughn <lvaughn@agillion.com> 44 45 46 package com.sun.org.apache.xml.internal.serialize; 47 48 49 import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl; 50 import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl; 51 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 52 import com.sun.org.apache.xerces.internal.util.XMLChar; 53 import java.io.IOException; 54 import java.io.OutputStream; 55 import java.io.Writer; 56 import java.util.HashMap; 57 import java.util.Map; 58 import java.util.Vector; 59 import org.w3c.dom.DOMError; 60 import org.w3c.dom.DOMErrorHandler; 61 import org.w3c.dom.Document; 62 import org.w3c.dom.DocumentFragment; 63 import org.w3c.dom.DocumentType; 64 import org.w3c.dom.Element; 65 import org.w3c.dom.Node; 66 import org.w3c.dom.ls.LSException; 67 import org.w3c.dom.ls.LSSerializerFilter; 68 import org.w3c.dom.traversal.NodeFilter; 69 import org.xml.sax.ContentHandler; 70 import org.xml.sax.DTDHandler; 71 import org.xml.sax.DocumentHandler; 72 import org.xml.sax.Locator; 73 import org.xml.sax.SAXException; 74 import org.xml.sax.ext.DeclHandler; 75 import org.xml.sax.ext.LexicalHandler; 76 77 /** 78 * Base class for a serializer supporting both DOM and SAX pretty 79 * serializing of XML/HTML/XHTML documents. Derives classes perform 80 * the method-specific serializing, this class provides the common 81 * serializing mechanisms. 82 * <p> 83 * The serializer must be initialized with the proper writer and 84 * output format before it can be used by calling {@link #setOutputCharStream} 85 * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} 86 * for the output format. 87 * <p> 88 * The serializer can be reused any number of times, but cannot 89 * be used concurrently by two threads. 90 * <p> 91 * If an output stream is used, the encoding is taken from the 92 * output format (defaults to <tt>UTF-8</tt>). If a writer is 93 * used, make sure the writer uses the same encoding (if applies) 94 * as specified in the output format. 95 * <p> 96 * The serializer supports both DOM and SAX. DOM serializing is done 97 * by calling {@link #serialize(Document)} and SAX serializing is done by firing 98 * SAX events and using the serializer as a document handler. 99 * This also applies to derived class. 100 * <p> 101 * If an I/O exception occurs while serializing, the serializer 102 * will not throw an exception directly, but only throw it 103 * at the end of serializing (either DOM or SAX's {@link 104 * org.xml.sax.DocumentHandler#endDocument}. 105 * <p> 106 * For elements that are not specified as whitespace preserving, 107 * the serializer will potentially break long text lines at space 108 * boundaries, indent lines, and serialize elements on separate 109 * lines. Line terminators will be regarded as spaces, and 110 * spaces at beginning of line will be stripped. 111 * <p> 112 * When indenting, the serializer is capable of detecting seemingly 113 * element content, and serializing these elements indented on separate 114 * lines. An element is serialized indented when it is the first or 115 * last child of an element, or immediate following or preceding 116 * another element. 117 * 118 * 119 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 120 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> 121 * @author Elena Litani, IBM 122 * @author Sunitha Reddy, Sun Microsystems 123 * @see Serializer 124 * @see org.w3c.dom.ls.LSSerializer 125 * 126 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 127 * is replaced by that of Xalan. Main class 128 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 129 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 130 */ 131 @Deprecated 132 public abstract class BaseMarkupSerializer 133 implements ContentHandler, DocumentHandler, LexicalHandler, 134 DTDHandler, DeclHandler, DOMSerializer, Serializer 135 { 136 137 // DOM L3 implementation 138 protected short features = 0xFFFFFFFF; 139 protected DOMErrorHandler fDOMErrorHandler; 140 protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); 141 protected LSSerializerFilter fDOMFilter; 142 143 protected EncodingInfo _encodingInfo; 144 145 146 /** 147 * Holds array of all element states that have been entered. 148 * The array is automatically resized. When leaving an element, 149 * it's state is not removed but reused when later returning 150 * to the same nesting level. 151 */ 152 private ElementState[] _elementStates; 153 154 155 /** 156 * The index of the next state to place in the array, 157 * or one plus the index of the current state. When zero, 158 * we are in no state. 159 */ 160 private int _elementStateCount; 161 162 163 /** 164 * Vector holding comments and PIs that come before the root 165 * element (even after it), see {@link #serializePreRoot}. 166 */ 167 private Vector _preRoot; 168 169 170 /** 171 * If the document has been started (header serialized), this 172 * flag is set to true so it's not started twice. 173 */ 174 protected boolean _started; 175 176 177 /** 178 * True if the serializer has been prepared. This flag is set 179 * to false when the serializer is reset prior to using it, 180 * and to true after it has been prepared for usage. 181 */ 182 private boolean _prepared; 183 184 185 /** 186 * Association between namespace URIs (keys) and prefixes (values). 187 * Accumulated here prior to starting an element and placing this 188 * list in the element state. 189 */ 190 protected Map<String, String> _prefixes; 191 192 193 /** 194 * The system identifier of the document type, if known. 195 */ 196 protected String _docTypePublicId; 197 198 199 /** 200 * The system identifier of the document type, if known. 201 */ 202 protected String _docTypeSystemId; 203 204 205 /** 206 * The output format associated with this serializer. This will never 207 * be a null reference. If no format was passed to the constructor, 208 * the default one for this document type will be used. The format 209 * object is never changed by the serializer. 210 */ 211 protected OutputFormat _format; 212 213 214 /** 215 * The printer used for printing text parts. 216 */ 217 protected Printer _printer; 218 219 220 /** 221 * True if indenting printer. 222 */ 223 protected boolean _indenting; 224 225 /** Temporary buffer to store character data */ 226 protected final StringBuffer fStrBuffer = new StringBuffer(40); 227 228 /** 229 * The underlying writer. 230 */ 231 private Writer _writer; 232 233 234 /** 235 * The output stream. 236 */ 237 private OutputStream _output; 238 239 /** Current node that is being processed */ 240 protected Node fCurrentNode = null; 241 242 243 244 //--------------------------------// 245 // Constructor and initialization // 246 //--------------------------------// 247 248 249 /** 250 * Protected constructor can only be used by derived class. 251 * Must initialize the serializer before serializing any document, 252 * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 253 * first 254 */ 255 protected BaseMarkupSerializer( OutputFormat format ) 256 { 257 int i; 258 259 _elementStates = new ElementState[ 10 ]; 260 for ( i = 0 ; i < _elementStates.length ; ++i ) 261 _elementStates[ i ] = new ElementState(); 262 _format = format; 263 } 264 265 266 public DocumentHandler asDocumentHandler() 267 throws IOException 268 { 269 prepare(); 270 return this; 271 } 272 273 274 public ContentHandler asContentHandler() 275 throws IOException 276 { 277 prepare(); 278 return this; 279 } 280 281 282 public DOMSerializer asDOMSerializer() 283 throws IOException 284 { 285 prepare(); 286 return this; 287 } 288 289 290 public void setOutputByteStream( OutputStream output ) 291 { 292 if ( output == null ) { 293 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 294 "ArgumentIsNull", new Object[]{"output"}); 295 throw new NullPointerException(msg); 296 } 297 _output = output; 298 _writer = null; 299 reset(); 300 } 301 302 303 public void setOutputCharStream( Writer writer ) 304 { 305 if ( writer == null ) { 306 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 307 "ArgumentIsNull", new Object[]{"writer"}); 308 throw new NullPointerException(msg); 309 } 310 _writer = writer; 311 _output = null; 312 reset(); 313 } 314 315 316 public void setOutputFormat( OutputFormat format ) 317 { 318 if ( format == null ) { 319 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 320 "ArgumentIsNull", new Object[]{"format"}); 321 throw new NullPointerException(msg); 322 } 323 _format = format; 324 reset(); 325 } 326 327 328 public boolean reset() 329 { 330 if ( _elementStateCount > 1 ) { 331 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 332 "ResetInMiddle", null); 333 throw new IllegalStateException(msg); 334 } 335 _prepared = false; 336 fCurrentNode = null; 337 fStrBuffer.setLength(0); 338 return true; 339 } 340 341 protected void cleanup() { 342 fCurrentNode = null; 343 } 344 345 protected void prepare() 346 throws IOException 347 { 348 if ( _prepared ) 349 return; 350 351 if ( _writer == null && _output == null ) { 352 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 353 "NoWriterSupplied", null); 354 throw new IOException(msg); 355 } 356 // If the output stream has been set, use it to construct 357 // the writer. It is possible that the serializer has been 358 // reused with the same output stream and different encoding. 359 360 _encodingInfo = _format.getEncodingInfo(); 361 362 if ( _output != null ) { 363 _writer = _encodingInfo.getWriter(_output); 364 } 365 366 if ( _format.getIndenting() ) { 367 _indenting = true; 368 _printer = new IndentPrinter( _writer, _format ); 369 } else { 370 _indenting = false; 371 _printer = new Printer( _writer, _format ); 372 } 373 374 ElementState state; 375 376 _elementStateCount = 0; 377 state = _elementStates[ 0 ]; 378 state.namespaceURI = null; 379 state.localName = null; 380 state.rawName = null; 381 state.preserveSpace = _format.getPreserveSpace(); 382 state.empty = true; 383 state.afterElement = false; 384 state.afterComment = false; 385 state.doCData = state.inCData = false; 386 state.prefixes = null; 387 388 _docTypePublicId = _format.getDoctypePublic(); 389 _docTypeSystemId = _format.getDoctypeSystem(); 390 _started = false; 391 _prepared = true; 392 } 393 394 395 396 //----------------------------------// 397 // DOM document serializing methods // 398 //----------------------------------// 399 400 401 /** 402 * Serializes the DOM element using the previously specified 403 * writer and output format. Throws an exception only if 404 * an I/O exception occured while serializing. 405 * 406 * @param elem The element to serialize 407 * @throws IOException An I/O exception occured while 408 * serializing 409 */ 410 public void serialize( Element elem ) 411 throws IOException 412 { 413 reset(); 414 prepare(); 415 serializeNode( elem ); 416 cleanup(); 417 _printer.flush(); 418 if ( _printer.getException() != null ) 419 throw _printer.getException(); 420 } 421 422 /** 423 * Serializes a node using the previously specified 424 * writer and output format. Throws an exception only if 425 * an I/O exception occured while serializing. 426 * 427 * @param node Node to serialize 428 * @throws IOException An I/O exception occured while serializing 429 */ 430 public void serialize( Node node ) throws IOException { 431 reset(); 432 prepare(); 433 serializeNode( node ); 434 //Print any PIs and Comments which appeared in 'node' 435 serializePreRoot(); 436 _printer.flush(); 437 if ( _printer.getException() != null ) 438 throw _printer.getException(); 439 } 440 441 /** 442 * Serializes the DOM document fragmnt using the previously specified 443 * writer and output format. Throws an exception only if 444 * an I/O exception occured while serializing. 445 * 446 * @param frag The document fragment to serialize 447 * @throws IOException An I/O exception occured while 448 * serializing 449 */ 450 public void serialize( DocumentFragment frag ) 451 throws IOException 452 { 453 reset(); 454 prepare(); 455 serializeNode( frag ); 456 cleanup(); 457 _printer.flush(); 458 if ( _printer.getException() != null ) 459 throw _printer.getException(); 460 } 461 462 463 /** 464 * Serializes the DOM document using the previously specified 465 * writer and output format. Throws an exception only if 466 * an I/O exception occured while serializing. 467 * 468 * @param doc The document to serialize 469 * @throws IOException An I/O exception occured while 470 * serializing 471 */ 472 public void serialize( Document doc ) 473 throws IOException 474 { 475 reset(); 476 prepare(); 477 serializeNode( doc ); 478 serializePreRoot(); 479 cleanup(); 480 _printer.flush(); 481 if ( _printer.getException() != null ) 482 throw _printer.getException(); 483 } 484 485 486 //------------------------------------------// 487 // SAX document handler serializing methods // 488 //------------------------------------------// 489 490 491 public void startDocument() 492 throws SAXException 493 { 494 try { 495 prepare(); 496 } catch ( IOException except ) { 497 throw new SAXException( except.toString() ); 498 } 499 // Nothing to do here. All the magic happens in startDocument(String) 500 } 501 502 503 public void characters( char[] chars, int start, int length ) 504 throws SAXException 505 { 506 ElementState state; 507 508 try { 509 state = content(); 510 511 // Check if text should be print as CDATA section or unescaped 512 // based on elements listed in the output format (the element 513 // state) or whether we are inside a CDATA section or entity. 514 515 if ( state.inCData || state.doCData ) { 516 int saveIndent; 517 518 // Print a CDATA section. The text is not escaped, but ']]>' 519 // appearing in the code must be identified and dealt with. 520 // The contents of a text node is considered space preserving. 521 if ( ! state.inCData ) { 522 _printer.printText( "<![CDATA[" ); 523 state.inCData = true; 524 } 525 saveIndent = _printer.getNextIndent(); 526 _printer.setNextIndent( 0 ); 527 char ch; 528 final int end = start + length; 529 for ( int index = start ; index < end; ++index ) { 530 ch = chars[index]; 531 if ( ch == ']' && index + 2 < end && 532 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 533 _printer.printText("]]]]><![CDATA[>"); 534 index +=2; 535 continue; 536 } 537 if (!XMLChar.isValid(ch)) { 538 // check if it is surrogate 539 if (++index < end) { 540 surrogates(ch, chars[index],true); 541 } 542 else { 543 fatalError("The character '"+ch+"' is an invalid XML character"); 544 } 545 continue; 546 } 547 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 548 ch == '\n' || ch == '\r' || ch == '\t' ) { 549 _printer.printText(ch); 550 } 551 else { 552 // The character is not printable -- split CDATA section 553 _printer.printText("]]>&#x"); 554 _printer.printText(Integer.toHexString(ch)); 555 _printer.printText(";<![CDATA["); 556 } 557 } 558 _printer.setNextIndent( saveIndent ); 559 560 } else { 561 562 int saveIndent; 563 564 if ( state.preserveSpace ) { 565 // If preserving space then hold of indentation so no 566 // excessive spaces are printed at line breaks, escape 567 // the text content without replacing spaces and print 568 // the text breaking only at line breaks. 569 saveIndent = _printer.getNextIndent(); 570 _printer.setNextIndent( 0 ); 571 printText( chars, start, length, true, state.unescaped ); 572 _printer.setNextIndent( saveIndent ); 573 } else { 574 printText( chars, start, length, false, state.unescaped ); 575 } 576 } 577 } catch ( IOException except ) { 578 throw new SAXException( except ); 579 } 580 } 581 582 583 public void ignorableWhitespace( char[] chars, int start, int length ) 584 throws SAXException 585 { 586 int i; 587 588 try { 589 content(); 590 591 // Print ignorable whitespaces only when indenting, after 592 // all they are indentation. Cancel the indentation to 593 // not indent twice. 594 if ( _indenting ) { 595 _printer.setThisIndent( 0 ); 596 for ( i = start ; length-- > 0 ; ++i ) 597 _printer.printText( chars[ i ] ); 598 } 599 } catch ( IOException except ) { 600 throw new SAXException( except ); 601 } 602 } 603 604 605 public final void processingInstruction( String target, String code ) 606 throws SAXException 607 { 608 try { 609 processingInstructionIO( target, code ); 610 } catch ( IOException except ) { 611 throw new SAXException( except ); 612 } 613 } 614 615 public void processingInstructionIO( String target, String code ) 616 throws IOException 617 { 618 int index; 619 ElementState state; 620 621 state = content(); 622 623 // Create the processing instruction textual representation. 624 // Make sure we don't have '?>' inside either target or code. 625 index = target.indexOf( "?>" ); 626 if ( index >= 0 ) 627 fStrBuffer.append( "<?" ).append( target.substring( 0, index ) ); 628 else 629 fStrBuffer.append( "<?" ).append( target ); 630 if ( code != null ) { 631 fStrBuffer.append( ' ' ); 632 index = code.indexOf( "?>" ); 633 if ( index >= 0 ) 634 fStrBuffer.append( code.substring( 0, index ) ); 635 else 636 fStrBuffer.append( code ); 637 } 638 fStrBuffer.append( "?>" ); 639 640 // If before the root element (or after it), do not print 641 // the PI directly but place it in the pre-root vector. 642 if ( isDocumentState() ) { 643 if ( _preRoot == null ) 644 _preRoot = new Vector(); 645 _preRoot.addElement( fStrBuffer.toString() ); 646 } else { 647 _printer.indent(); 648 printText( fStrBuffer.toString(), true, true ); 649 _printer.unindent(); 650 if ( _indenting ) 651 state.afterElement = true; 652 } 653 654 fStrBuffer.setLength(0); 655 } 656 657 658 public void comment( char[] chars, int start, int length ) 659 throws SAXException 660 { 661 try { 662 comment( new String( chars, start, length ) ); 663 } catch ( IOException except ) { 664 throw new SAXException( except ); 665 } 666 } 667 668 669 public void comment( String text ) 670 throws IOException 671 { 672 int index; 673 ElementState state; 674 675 if ( _format.getOmitComments() ) 676 return; 677 678 state = content(); 679 // Create the processing comment textual representation. 680 // Make sure we don't have '-->' inside the comment. 681 index = text.indexOf( "-->" ); 682 if ( index >= 0 ) 683 fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" ); 684 else 685 fStrBuffer.append( "<!--" ).append( text ).append( "-->" ); 686 687 // If before the root element (or after it), do not print 688 // the comment directly but place it in the pre-root vector. 689 if ( isDocumentState() ) { 690 if ( _preRoot == null ) 691 _preRoot = new Vector(); 692 _preRoot.addElement( fStrBuffer.toString() ); 693 } else { 694 // Indent this element on a new line if the first 695 // content of the parent element or immediately 696 // following an element. 697 if ( _indenting && ! state.preserveSpace) 698 _printer.breakLine(); 699 _printer.indent(); 700 printText( fStrBuffer.toString(), true, true ); 701 _printer.unindent(); 702 if ( _indenting ) 703 state.afterElement = true; 704 } 705 706 fStrBuffer.setLength(0); 707 state.afterComment = true; 708 state.afterElement = false; 709 } 710 711 712 public void startCDATA() 713 { 714 ElementState state; 715 716 state = getElementState(); 717 state.doCData = true; 718 } 719 720 721 public void endCDATA() 722 { 723 ElementState state; 724 725 state = getElementState(); 726 state.doCData = false; 727 } 728 729 730 public void startNonEscaping() 731 { 732 ElementState state; 733 734 state = getElementState(); 735 state.unescaped = true; 736 } 737 738 739 public void endNonEscaping() 740 { 741 ElementState state; 742 743 state = getElementState(); 744 state.unescaped = false; 745 } 746 747 748 public void startPreserving() 749 { 750 ElementState state; 751 752 state = getElementState(); 753 state.preserveSpace = true; 754 } 755 756 757 public void endPreserving() 758 { 759 ElementState state; 760 761 state = getElementState(); 762 state.preserveSpace = false; 763 } 764 765 766 /** 767 * Called at the end of the document to wrap it up. 768 * Will flush the output stream and throw an exception 769 * if any I/O error occured while serializing. 770 * 771 * @throws SAXException An I/O exception occured during 772 * serializing 773 */ 774 public void endDocument() 775 throws SAXException 776 { 777 try { 778 // Print all the elements accumulated outside of 779 // the root element. 780 serializePreRoot(); 781 // Flush the output, this is necessary for fStrBuffered output. 782 _printer.flush(); 783 } catch ( IOException except ) { 784 throw new SAXException( except ); 785 } 786 } 787 788 789 public void startEntity( String name ) 790 { 791 // ??? 792 } 793 794 795 public void endEntity( String name ) 796 { 797 // ??? 798 } 799 800 801 public void setDocumentLocator( Locator locator ) 802 { 803 // Nothing to do 804 } 805 806 807 //-----------------------------------------// 808 // SAX content handler serializing methods // 809 //-----------------------------------------// 810 811 812 public void skippedEntity ( String name ) 813 throws SAXException 814 { 815 try { 816 endCDATA(); 817 content(); 818 _printer.printText( '&' ); 819 _printer.printText( name ); 820 _printer.printText( ';' ); 821 } catch ( IOException except ) { 822 throw new SAXException( except ); 823 } 824 } 825 826 827 public void startPrefixMapping( String prefix, String uri ) 828 throws SAXException 829 { 830 if ( _prefixes == null ) 831 _prefixes = new HashMap<>(); 832 _prefixes.put( uri, prefix == null ? "" : prefix ); 833 } 834 835 836 public void endPrefixMapping( String prefix ) 837 throws SAXException 838 { 839 } 840 841 842 //------------------------------------------// 843 // SAX DTD/Decl handler serializing methods // 844 //------------------------------------------// 845 846 847 public final void startDTD( String name, String publicId, String systemId ) 848 throws SAXException 849 { 850 try { 851 _printer.enterDTD(); 852 _docTypePublicId = publicId; 853 _docTypeSystemId = systemId; 854 855 } catch ( IOException except ) { 856 throw new SAXException( except ); 857 } 858 } 859 860 861 public void endDTD() 862 { 863 // Nothing to do here, all the magic occurs in startDocument(String). 864 } 865 866 867 public void elementDecl( String name, String model ) 868 throws SAXException 869 { 870 try { 871 _printer.enterDTD(); 872 _printer.printText( "<!ELEMENT " ); 873 _printer.printText( name ); 874 _printer.printText( ' ' ); 875 _printer.printText( model ); 876 _printer.printText( '>' ); 877 if ( _indenting ) 878 _printer.breakLine(); 879 } catch ( IOException except ) { 880 throw new SAXException( except ); 881 } 882 } 883 884 885 public void attributeDecl( String eName, String aName, String type, 886 String valueDefault, String value ) 887 throws SAXException 888 { 889 try { 890 _printer.enterDTD(); 891 _printer.printText( "<!ATTLIST " ); 892 _printer.printText( eName ); 893 _printer.printText( ' ' ); 894 _printer.printText( aName ); 895 _printer.printText( ' ' ); 896 _printer.printText( type ); 897 if ( valueDefault != null ) { 898 _printer.printText( ' ' ); 899 _printer.printText( valueDefault ); 900 } 901 if ( value != null ) { 902 _printer.printText( " \"" ); 903 printEscaped( value ); 904 _printer.printText( '"' ); 905 } 906 _printer.printText( '>' ); 907 if ( _indenting ) 908 _printer.breakLine(); 909 } catch ( IOException except ) { 910 throw new SAXException( except ); 911 } 912 } 913 914 915 public void internalEntityDecl( String name, String value ) 916 throws SAXException 917 { 918 try { 919 _printer.enterDTD(); 920 _printer.printText( "<!ENTITY " ); 921 _printer.printText( name ); 922 _printer.printText( " \"" ); 923 printEscaped( value ); 924 _printer.printText( "\">" ); 925 if ( _indenting ) 926 _printer.breakLine(); 927 } catch ( IOException except ) { 928 throw new SAXException( except ); 929 } 930 } 931 932 933 public void externalEntityDecl( String name, String publicId, String systemId ) 934 throws SAXException 935 { 936 try { 937 _printer.enterDTD(); 938 unparsedEntityDecl( name, publicId, systemId, null ); 939 } catch ( IOException except ) { 940 throw new SAXException( except ); 941 } 942 } 943 944 945 public void unparsedEntityDecl( String name, String publicId, 946 String systemId, String notationName ) 947 throws SAXException 948 { 949 try { 950 _printer.enterDTD(); 951 if ( publicId == null ) { 952 _printer.printText( "<!ENTITY " ); 953 _printer.printText( name ); 954 _printer.printText( " SYSTEM " ); 955 printDoctypeURL( systemId ); 956 } else { 957 _printer.printText( "<!ENTITY " ); 958 _printer.printText( name ); 959 _printer.printText( " PUBLIC " ); 960 printDoctypeURL( publicId ); 961 _printer.printText( ' ' ); 962 printDoctypeURL( systemId ); 963 } 964 if ( notationName != null ) { 965 _printer.printText( " NDATA " ); 966 _printer.printText( notationName ); 967 } 968 _printer.printText( '>' ); 969 if ( _indenting ) 970 _printer.breakLine(); 971 } catch ( IOException except ) { 972 throw new SAXException( except ); 973 } 974 } 975 976 977 public void notationDecl( String name, String publicId, String systemId ) 978 throws SAXException 979 { 980 try { 981 _printer.enterDTD(); 982 if ( publicId != null ) { 983 _printer.printText( "<!NOTATION " ); 984 _printer.printText( name ); 985 _printer.printText( " PUBLIC " ); 986 printDoctypeURL( publicId ); 987 if ( systemId != null ) { 988 _printer.printText( ' ' ); 989 printDoctypeURL( systemId ); 990 } 991 } else { 992 _printer.printText( "<!NOTATION " ); 993 _printer.printText( name ); 994 _printer.printText( " SYSTEM " ); 995 printDoctypeURL( systemId ); 996 } 997 _printer.printText( '>' ); 998 if ( _indenting ) 999 _printer.breakLine(); 1000 } catch ( IOException except ) { 1001 throw new SAXException( except ); 1002 } 1003 } 1004 1005 1006 //------------------------------------------// 1007 // Generic node serializing methods methods // 1008 //------------------------------------------// 1009 1010 1011 /** 1012 * Serialize the DOM node. This method is shared across XML, HTML and XHTML 1013 * serializers and the differences are masked out in a separate {@link 1014 * #serializeElement}. 1015 * 1016 * @param node The node to serialize 1017 * @see #serializeElement 1018 * @throws IOException An I/O exception occured while 1019 * serializing 1020 */ 1021 protected void serializeNode( Node node ) 1022 throws IOException 1023 { 1024 fCurrentNode = node; 1025 1026 // Based on the node type call the suitable SAX handler. 1027 // Only comments entities and documents which are not 1028 // handled by SAX are serialized directly. 1029 switch ( node.getNodeType() ) { 1030 case Node.TEXT_NODE : { 1031 String text; 1032 1033 text = node.getNodeValue(); 1034 if ( text != null ) { 1035 if (fDOMFilter !=null && 1036 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) { 1037 short code = fDOMFilter.acceptNode(node); 1038 switch (code) { 1039 case NodeFilter.FILTER_REJECT: 1040 case NodeFilter.FILTER_SKIP: { 1041 break; 1042 } 1043 default: { 1044 characters(text); 1045 } 1046 } 1047 } 1048 else if ( !_indenting || getElementState().preserveSpace 1049 || (text.replace('\n',' ').trim().length() != 0)) 1050 characters( text ); 1051 1052 } 1053 break; 1054 } 1055 1056 case Node.CDATA_SECTION_NODE : { 1057 String text = node.getNodeValue(); 1058 if ((features & DOMSerializerImpl.CDATA) != 0) { 1059 if (text != null) { 1060 if (fDOMFilter != null 1061 && (fDOMFilter.getWhatToShow() 1062 & NodeFilter.SHOW_CDATA_SECTION) 1063 != 0) { 1064 short code = fDOMFilter.acceptNode(node); 1065 switch (code) { 1066 case NodeFilter.FILTER_REJECT : 1067 case NodeFilter.FILTER_SKIP : 1068 { 1069 // skip the CDATA node 1070 return; 1071 } 1072 default : 1073 { 1074 //fall through.. 1075 } 1076 } 1077 } 1078 startCDATA(); 1079 characters(text); 1080 endCDATA(); 1081 } 1082 } else { 1083 // transform into a text node 1084 characters(text); 1085 } 1086 break; 1087 } 1088 case Node.COMMENT_NODE : { 1089 String text; 1090 1091 if ( ! _format.getOmitComments() ) { 1092 text = node.getNodeValue(); 1093 if ( text != null ) { 1094 1095 if (fDOMFilter !=null && 1096 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) { 1097 short code = fDOMFilter.acceptNode(node); 1098 switch (code) { 1099 case NodeFilter.FILTER_REJECT: 1100 case NodeFilter.FILTER_SKIP: { 1101 // skip the comment node 1102 return; 1103 } 1104 default: { 1105 // fall through 1106 } 1107 } 1108 } 1109 comment( text ); 1110 } 1111 } 1112 break; 1113 } 1114 1115 case Node.ENTITY_REFERENCE_NODE : { 1116 Node child; 1117 1118 endCDATA(); 1119 content(); 1120 1121 if (((features & DOMSerializerImpl.ENTITIES) != 0) 1122 || (node.getFirstChild() == null)) { 1123 if (fDOMFilter !=null && 1124 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) { 1125 short code = fDOMFilter.acceptNode(node); 1126 switch (code) { 1127 case NodeFilter.FILTER_REJECT:{ 1128 return; // remove the node 1129 } 1130 case NodeFilter.FILTER_SKIP: { 1131 child = node.getFirstChild(); 1132 while ( child != null ) { 1133 serializeNode( child ); 1134 child = child.getNextSibling(); 1135 } 1136 return; 1137 } 1138 1139 default: { 1140 // fall through 1141 } 1142 } 1143 } 1144 checkUnboundNamespacePrefixedNode(node); 1145 1146 _printer.printText("&"); 1147 _printer.printText(node.getNodeName()); 1148 _printer.printText(";"); 1149 } 1150 else { 1151 child = node.getFirstChild(); 1152 while ( child != null ) { 1153 serializeNode( child ); 1154 child = child.getNextSibling(); 1155 } 1156 } 1157 1158 break; 1159 } 1160 1161 case Node.PROCESSING_INSTRUCTION_NODE : { 1162 1163 if (fDOMFilter !=null && 1164 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) { 1165 short code = fDOMFilter.acceptNode(node); 1166 switch (code) { 1167 case NodeFilter.FILTER_REJECT: 1168 case NodeFilter.FILTER_SKIP: { 1169 return; // skip this node 1170 } 1171 default: { // fall through 1172 } 1173 } 1174 } 1175 processingInstructionIO( node.getNodeName(), node.getNodeValue() ); 1176 break; 1177 } 1178 case Node.ELEMENT_NODE : { 1179 1180 if (fDOMFilter !=null && 1181 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) { 1182 short code = fDOMFilter.acceptNode(node); 1183 switch (code) { 1184 case NodeFilter.FILTER_REJECT: { 1185 return; 1186 } 1187 case NodeFilter.FILTER_SKIP: { 1188 Node child = node.getFirstChild(); 1189 while ( child != null ) { 1190 serializeNode( child ); 1191 child = child.getNextSibling(); 1192 } 1193 return; // skip this node 1194 } 1195 1196 default: { // fall through 1197 } 1198 } 1199 } 1200 serializeElement( (Element) node ); 1201 break; 1202 } 1203 case Node.DOCUMENT_NODE : { 1204 DocumentType docType; 1205 1206 serializeDocument(); 1207 1208 // If there is a document type, use the SAX events to 1209 // serialize it. 1210 docType = ( (Document) node ).getDoctype(); 1211 if (docType != null) { 1212 // DOM Level 2 (or higher) 1213 try { 1214 String internal; 1215 1216 _printer.enterDTD(); 1217 _docTypePublicId = docType.getPublicId(); 1218 _docTypeSystemId = docType.getSystemId(); 1219 internal = docType.getInternalSubset(); 1220 if ( internal != null && internal.length() > 0 ) 1221 _printer.printText( internal ); 1222 endDTD(); 1223 } catch (Exception e) { 1224 // ignore 1225 _printer.enterDTD(); 1226 _docTypePublicId = null; 1227 _docTypeSystemId = null; 1228 endDTD(); 1229 } 1230 serializeDTD(docType.getName()); 1231 1232 } 1233 _started = true; 1234 1235 // !! Fall through 1236 } 1237 case Node.DOCUMENT_FRAGMENT_NODE : { 1238 Node child; 1239 1240 // By definition this will happen if the node is a document, 1241 // document fragment, etc. Just serialize its contents. It will 1242 // work well for other nodes that we do not know how to serialize. 1243 child = node.getFirstChild(); 1244 while ( child != null ) { 1245 serializeNode( child ); 1246 child = child.getNextSibling(); 1247 } 1248 break; 1249 } 1250 1251 default: 1252 break; 1253 } 1254 } 1255 1256 1257 /* Serializes XML Declaration, according to 'xml-declaration' property. 1258 */ 1259 protected void serializeDocument()throws IOException { 1260 int i; 1261 1262 String dtd = _printer.leaveDTD(); 1263 if (! _started) { 1264 1265 if (! _format.getOmitXMLDeclaration()) { 1266 StringBuffer buffer; 1267 1268 // Serialize the document declaration appreaing at the head 1269 // of very XML document (unless asked not to). 1270 buffer = new StringBuffer( "<?xml version=\"" ); 1271 if (_format.getVersion() != null) 1272 buffer.append( _format.getVersion() ); 1273 else 1274 buffer.append( "1.0" ); 1275 buffer.append( '"' ); 1276 String format_encoding = _format.getEncoding(); 1277 if (format_encoding != null) { 1278 buffer.append( " encoding=\"" ); 1279 buffer.append( format_encoding ); 1280 buffer.append( '"' ); 1281 } 1282 if (_format.getStandalone() && _docTypeSystemId == null && 1283 _docTypePublicId == null) 1284 buffer.append( " standalone=\"yes\"" ); 1285 buffer.append( "?>" ); 1286 _printer.printText( buffer ); 1287 _printer.breakLine(); 1288 } 1289 } 1290 1291 // Always serialize these, even if not te first root element. 1292 serializePreRoot(); 1293 1294 } 1295 1296 /* Serializes DTD, if present. 1297 */ 1298 protected void serializeDTD(String name) throws IOException{ 1299 1300 String dtd = _printer.leaveDTD(); 1301 if (! _format.getOmitDocumentType()) { 1302 if (_docTypeSystemId != null) { 1303 // System identifier must be specified to print DOCTYPE. 1304 // If public identifier is specified print 'PUBLIC 1305 // <public> <system>', if not, print 'SYSTEM <system>'. 1306 _printer.printText( "<!DOCTYPE " ); 1307 _printer.printText( name ); 1308 if (_docTypePublicId != null) { 1309 _printer.printText( " PUBLIC " ); 1310 printDoctypeURL( _docTypePublicId ); 1311 if (_indenting) { 1312 _printer.breakLine(); 1313 for (int i = 0 ; i < 18 + name.length() ; ++i) 1314 _printer.printText( " " ); 1315 } else 1316 _printer.printText( " " ); 1317 printDoctypeURL( _docTypeSystemId ); 1318 } else { 1319 _printer.printText( " SYSTEM " ); 1320 printDoctypeURL( _docTypeSystemId ); 1321 } 1322 1323 // If we accumulated any DTD contents while printing. 1324 // this would be the place to print it. 1325 if (dtd != null && dtd.length() > 0) { 1326 _printer.printText( " [" ); 1327 printText( dtd, true, true ); 1328 _printer.printText( ']' ); 1329 } 1330 1331 _printer.printText( ">" ); 1332 _printer.breakLine(); 1333 } else if (dtd != null && dtd.length() > 0) { 1334 _printer.printText( "<!DOCTYPE " ); 1335 _printer.printText( name ); 1336 _printer.printText( " [" ); 1337 printText( dtd, true, true ); 1338 _printer.printText( "]>" ); 1339 _printer.breakLine(); 1340 } 1341 } 1342 } 1343 1344 1345 /** 1346 * Must be called by a method about to print any type of content. 1347 * If the element was just opened, the opening tag is closed and 1348 * will be matched to a closing tag. Returns the current element 1349 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. 1350 * 1351 * @return The current element state 1352 * @throws IOException An I/O exception occurred while 1353 * serializing 1354 */ 1355 protected ElementState content() 1356 throws IOException 1357 { 1358 ElementState state; 1359 1360 state = getElementState(); 1361 if ( ! isDocumentState() ) { 1362 // Need to close CData section first 1363 if ( state.inCData && ! state.doCData ) { 1364 _printer.printText( "]]>" ); 1365 state.inCData = false; 1366 } 1367 // If this is the first content in the element, 1368 // change the state to not-empty and close the 1369 // opening element tag. 1370 if ( state.empty ) { 1371 _printer.printText( '>' ); 1372 state.empty = false; 1373 } 1374 // Except for one content type, all of them 1375 // are not last element. That one content 1376 // type will take care of itself. 1377 state.afterElement = false; 1378 // Except for one content type, all of them 1379 // are not last comment. That one content 1380 // type will take care of itself. 1381 state.afterComment = false; 1382 } 1383 return state; 1384 } 1385 1386 1387 /** 1388 * Called to print the text contents in the prevailing element format. 1389 * Since this method is capable of printing text as CDATA, it is used 1390 * for that purpose as well. White space handling is determined by the 1391 * current element state. In addition, the output format can dictate 1392 * whether the text is printed as CDATA or unescaped. 1393 * 1394 * @param text The text to print 1395 * @throws IOException An I/O exception occured while 1396 * serializing 1397 */ 1398 protected void characters( String text ) 1399 throws IOException 1400 { 1401 ElementState state; 1402 1403 state = content(); 1404 // Check if text should be print as CDATA section or unescaped 1405 // based on elements listed in the output format (the element 1406 // state) or whether we are inside a CDATA section or entity. 1407 1408 if ( state.inCData || state.doCData ) { 1409 // Print a CDATA section. The text is not escaped, but ']]>' 1410 // appearing in the code must be identified and dealt with. 1411 // The contents of a text node is considered space preserving. 1412 if ( ! state.inCData ) { 1413 _printer.printText("<![CDATA["); 1414 state.inCData = true; 1415 } 1416 int saveIndent = _printer.getNextIndent(); 1417 _printer.setNextIndent( 0 ); 1418 printCDATAText( text); 1419 _printer.setNextIndent( saveIndent ); 1420 1421 } else { 1422 1423 int saveIndent; 1424 1425 if ( state.preserveSpace ) { 1426 // If preserving space then hold of indentation so no 1427 // excessive spaces are printed at line breaks, escape 1428 // the text content without replacing spaces and print 1429 // the text breaking only at line breaks. 1430 saveIndent = _printer.getNextIndent(); 1431 _printer.setNextIndent( 0 ); 1432 printText( text, true, state.unescaped ); 1433 _printer.setNextIndent( saveIndent ); 1434 } else { 1435 printText( text, false, state.unescaped ); 1436 } 1437 } 1438 } 1439 1440 1441 /** 1442 * Returns the suitable entity reference for this character value, 1443 * or null if no such entity exists. Calling this method with <tt>'&'</tt> 1444 * will return <tt>"&amp;"</tt>. 1445 * 1446 * @param ch Character value 1447 * @return Character entity name, or null 1448 */ 1449 protected abstract String getEntityRef( int ch ); 1450 1451 1452 /** 1453 * Called to serializee the DOM element. The element is serialized based on 1454 * the serializer's method (XML, HTML, XHTML). 1455 * 1456 * @param elem The element to serialize 1457 * @throws IOException An I/O exception occured while 1458 * serializing 1459 */ 1460 protected abstract void serializeElement( Element elem ) 1461 throws IOException; 1462 1463 1464 /** 1465 * Comments and PIs cannot be serialized before the root element, 1466 * because the root element serializes the document type, which 1467 * generally comes first. Instead such PIs and comments are 1468 * accumulated inside a vector and serialized by calling this 1469 * method. Will be called when the root element is serialized 1470 * and when the document finished serializing. 1471 * 1472 * @throws IOException An I/O exception occured while 1473 * serializing 1474 */ 1475 protected void serializePreRoot() 1476 throws IOException 1477 { 1478 int i; 1479 1480 if ( _preRoot != null ) { 1481 for ( i = 0 ; i < _preRoot.size() ; ++i ) { 1482 printText( (String) _preRoot.elementAt( i ), true, true ); 1483 if ( _indenting ) 1484 _printer.breakLine(); 1485 } 1486 _preRoot.removeAllElements(); 1487 } 1488 } 1489 1490 1491 //---------------------------------------------// 1492 // Text pretty printing and formatting methods // 1493 //---------------------------------------------// 1494 1495 protected void printCDATAText( String text ) throws IOException { 1496 int length = text.length(); 1497 char ch; 1498 1499 for ( int index = 0 ; index < length; ++index ) { 1500 ch = text.charAt( index ); 1501 if (ch == ']' 1502 && index + 2 < length 1503 && text.charAt(index + 1) == ']' 1504 && text.charAt(index + 2) == '>') { // check for ']]>' 1505 if (fDOMErrorHandler != null) { 1506 // REVISIT: this means that if DOM Error handler is not registered we don't report any 1507 // fatal errors and might serialize not wellformed document 1508 if ((features & DOMSerializerImpl.SPLITCDATA) == 0) { 1509 String msg = DOMMessageFormatter.formatMessage( 1510 DOMMessageFormatter.SERIALIZER_DOMAIN, 1511 "EndingCDATA", 1512 null); 1513 if ((features & DOMSerializerImpl.WELLFORMED) != 0) { 1514 // issue fatal error 1515 modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode); 1516 fDOMErrorHandler.handleError(fDOMError); 1517 throw new LSException(LSException.SERIALIZE_ERR, msg); 1518 } 1519 // issue error 1520 modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode); 1521 if (!fDOMErrorHandler.handleError(fDOMError)) { 1522 throw new LSException(LSException.SERIALIZE_ERR, msg); 1523 } 1524 } else { 1525 // issue warning 1526 String msg = 1527 DOMMessageFormatter.formatMessage( 1528 DOMMessageFormatter.SERIALIZER_DOMAIN, 1529 "SplittingCDATA", 1530 null); 1531 modifyDOMError( 1532 msg, 1533 DOMError.SEVERITY_WARNING, 1534 null, fCurrentNode); 1535 fDOMErrorHandler.handleError(fDOMError); 1536 } 1537 } 1538 // split CDATA section 1539 _printer.printText("]]]]><![CDATA[>"); 1540 index += 2; 1541 continue; 1542 } 1543 1544 if (!XMLChar.isValid(ch)) { 1545 // check if it is surrogate 1546 if (++index <length) { 1547 surrogates(ch, text.charAt(index),true); 1548 } 1549 else { 1550 fatalError("The character '"+ch+"' is an invalid XML character"); 1551 } 1552 continue; 1553 } 1554 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 1555 ch == '\n' || ch == '\r' || ch == '\t' ) { 1556 _printer.printText(ch); 1557 } 1558 else { 1559 1560 // The character is not printable -- split CDATA section 1561 _printer.printText("]]>&#x"); 1562 _printer.printText(Integer.toHexString(ch)); 1563 _printer.printText(";<![CDATA["); 1564 } 1565 } 1566 } 1567 1568 1569 protected void surrogates(int high, int low, boolean inContent) throws IOException{ 1570 if (XMLChar.isHighSurrogate(high)) { 1571 if (!XMLChar.isLowSurrogate(low)) { 1572 //Invalid XML 1573 fatalError("The character '"+(char)low+"' is an invalid XML character"); 1574 } 1575 else { 1576 int supplemental = XMLChar.supplemental((char)high, (char)low); 1577 if (!XMLChar.isValid(supplemental)) { 1578 //Invalid XML 1579 fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 1580 } 1581 else { 1582 if (inContent && content().inCData) { 1583 _printer.printText("]]>&#x"); 1584 _printer.printText(Integer.toHexString(supplemental)); 1585 _printer.printText(";<![CDATA["); 1586 } 1587 else { 1588 printHex(supplemental); 1589 } 1590 } 1591 } 1592 } else { 1593 fatalError("The character '"+(char)high+"' is an invalid XML character"); 1594 } 1595 1596 } 1597 1598 /** 1599 * Called to print additional text with whitespace handling. 1600 * If spaces are preserved, the text is printed as if by calling 1601 * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine} 1602 * for each new line. If spaces are not preserved, the text is 1603 * broken at space boundaries if longer than the line width; 1604 * Multiple spaces are printed as such, but spaces at beginning 1605 * of line are removed. 1606 * 1607 * @param chars The text to print 1608 * @param start The start offset 1609 * @param length The number of characters 1610 * @param preserveSpace Space preserving flag 1611 * @param unescaped Print unescaped 1612 */ 1613 protected void printText( char[] chars, int start, int length, 1614 boolean preserveSpace, boolean unescaped ) 1615 throws IOException 1616 { 1617 1618 if ( preserveSpace ) { 1619 // Preserving spaces: the text must print exactly as it is, 1620 // without breaking when spaces appear in the text and without 1621 // consolidating spaces. If a line terminator is used, a line 1622 // break will occur. 1623 while ( length-- > 0 ) { 1624 char ch = chars[ start ]; 1625 ++start; 1626 if ( ch == '\n' || ch == '\r' || unescaped ) { 1627 _printer.printText( ch ); 1628 } 1629 else { 1630 printEscaped( ch ); 1631 } 1632 } 1633 } else { 1634 // Not preserving spaces: print one part at a time, and 1635 // use spaces between parts to break them into different 1636 // lines. Spaces at beginning of line will be stripped 1637 // by printing mechanism. Line terminator is treated 1638 // no different than other text part. 1639 while ( length-- > 0 ) { 1640 char ch = chars[ start ]; 1641 ++start; 1642 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1643 _printer.printSpace(); 1644 } 1645 else if ( unescaped ) { 1646 _printer.printText( ch ); 1647 } 1648 else { 1649 printEscaped( ch ); 1650 } 1651 } 1652 } 1653 } 1654 1655 1656 protected void printText( String text, boolean preserveSpace, boolean unescaped ) 1657 throws IOException 1658 { 1659 int index; 1660 char ch; 1661 1662 if ( preserveSpace ) { 1663 // Preserving spaces: the text must print exactly as it is, 1664 // without breaking when spaces appear in the text and without 1665 // consolidating spaces. If a line terminator is used, a line 1666 // break will occur. 1667 for ( index = 0 ; index < text.length() ; ++index ) { 1668 ch = text.charAt( index ); 1669 if ( ch == '\n' || ch == '\r' || unescaped ) 1670 _printer.printText( ch ); 1671 else 1672 printEscaped( ch ); 1673 } 1674 } else { 1675 // Not preserving spaces: print one part at a time, and 1676 // use spaces between parts to break them into different 1677 // lines. Spaces at beginning of line will be stripped 1678 // by printing mechanism. Line terminator is treated 1679 // no different than other text part. 1680 for ( index = 0 ; index < text.length() ; ++index ) { 1681 ch = text.charAt( index ); 1682 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1683 _printer.printSpace(); 1684 } 1685 else if ( unescaped ) { 1686 _printer.printText( ch ); 1687 } 1688 else { 1689 printEscaped( ch ); 1690 } 1691 } 1692 } 1693 } 1694 1695 1696 /** 1697 * Print a document type public or system identifier URL. 1698 * Encapsulates the URL in double quotes, escapes non-printing 1699 * characters and print it equivalent to {@link #printText}. 1700 * 1701 * @param url The document type url to print 1702 */ 1703 protected void printDoctypeURL( String url ) 1704 throws IOException 1705 { 1706 int i; 1707 1708 _printer.printText( '"' ); 1709 for( i = 0 ; i < url.length() ; ++i ) { 1710 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) { 1711 _printer.printText( '%' ); 1712 _printer.printText( Integer.toHexString( url.charAt( i ) ) ); 1713 } else 1714 _printer.printText( url.charAt( i ) ); 1715 } 1716 _printer.printText( '"' ); 1717 } 1718 1719 1720 protected void printEscaped( int ch ) 1721 throws IOException 1722 { 1723 String charRef; 1724 // If there is a suitable entity reference for this 1725 // character, print it. The list of available entity 1726 // references is almost but not identical between 1727 // XML and HTML. 1728 charRef = getEntityRef( ch ); 1729 if ( charRef != null ) { 1730 _printer.printText( '&' ); 1731 _printer.printText( charRef ); 1732 _printer.printText( ';' ); 1733 } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0x7F ) || 1734 ch == '\n' || ch == '\r' || ch == '\t' ) { 1735 // Non printables are below ASCII space but not tab or line 1736 // terminator, ASCII delete, or above a certain Unicode threshold. 1737 if (ch < 0x10000) { 1738 _printer.printText((char)ch ); 1739 } else { 1740 _printer.printText((char)(((ch-0x10000)>>10)+0xd800)); 1741 _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00)); 1742 } 1743 } else { 1744 printHex(ch); 1745 } 1746 } 1747 1748 /** 1749 * Escapes chars 1750 */ 1751 final void printHex( int ch) throws IOException { 1752 _printer.printText( "&#x" ); 1753 _printer.printText(Integer.toHexString(ch)); 1754 _printer.printText( ';' ); 1755 1756 } 1757 1758 1759 /** 1760 * Escapes a string so it may be printed as text content or attribute 1761 * value. Non printable characters are escaped using character references. 1762 * Where the format specifies a deault entity reference, that reference 1763 * is used (e.g. <tt>&lt;</tt>). 1764 * 1765 * @param source The string to escape 1766 */ 1767 protected void printEscaped( String source ) 1768 throws IOException 1769 { 1770 for ( int i = 0 ; i < source.length() ; ++i ) { 1771 int ch = source.charAt(i); 1772 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) { 1773 int lowch = source.charAt(i+1); 1774 if ((lowch & 0xfc00) == 0xdc00) { 1775 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00; 1776 i++; 1777 } 1778 } 1779 printEscaped(ch); 1780 } 1781 } 1782 1783 1784 //--------------------------------// 1785 // Element state handling methods // 1786 //--------------------------------// 1787 1788 1789 /** 1790 * Return the state of the current element. 1791 * 1792 * @return Current element state 1793 */ 1794 protected ElementState getElementState() 1795 { 1796 return _elementStates[ _elementStateCount ]; 1797 } 1798 1799 1800 /** 1801 * Enter a new element state for the specified element. 1802 * Tag name and space preserving is specified, element 1803 * state is initially empty. 1804 * 1805 * @return Current element state, or null 1806 */ 1807 protected ElementState enterElementState( String namespaceURI, String localName, 1808 String rawName, boolean preserveSpace ) 1809 { 1810 ElementState state; 1811 1812 if ( _elementStateCount + 1 == _elementStates.length ) { 1813 ElementState[] newStates; 1814 1815 // Need to create a larger array of states. This does not happen 1816 // often, unless the document is really deep. 1817 newStates = new ElementState[ _elementStates.length + 10 ]; 1818 for ( int i = 0 ; i < _elementStates.length ; ++i ) 1819 newStates[ i ] = _elementStates[ i ]; 1820 for ( int i = _elementStates.length ; i < newStates.length ; ++i ) 1821 newStates[ i ] = new ElementState(); 1822 _elementStates = newStates; 1823 } 1824 1825 ++_elementStateCount; 1826 state = _elementStates[ _elementStateCount ]; 1827 state.namespaceURI = namespaceURI; 1828 state.localName = localName; 1829 state.rawName = rawName; 1830 state.preserveSpace = preserveSpace; 1831 state.empty = true; 1832 state.afterElement = false; 1833 state.afterComment = false; 1834 state.doCData = state.inCData = false; 1835 state.unescaped = false; 1836 state.prefixes = _prefixes; 1837 1838 _prefixes = null; 1839 return state; 1840 } 1841 1842 1843 /** 1844 * Leave the current element state and return to the 1845 * state of the parent element. If this was the root 1846 * element, return to the state of the document. 1847 * 1848 * @return Previous element state 1849 */ 1850 protected ElementState leaveElementState() 1851 { 1852 if ( _elementStateCount > 0 ) { 1853 /*Corrected by David Blondeau (blondeau@intalio.com)*/ 1854 _prefixes = null; 1855 //_prefixes = _elementStates[ _elementStateCount ].prefixes; 1856 -- _elementStateCount; 1857 return _elementStates[ _elementStateCount ]; 1858 } 1859 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null); 1860 throw new IllegalStateException(msg); 1861 } 1862 1863 1864 /** 1865 * Returns true if in the state of the document. 1866 * Returns true before entering any element and after 1867 * leaving the root element. 1868 * 1869 * @return True if in the state of the document 1870 */ 1871 protected boolean isDocumentState() { 1872 return _elementStateCount == 0; 1873 } 1874 1875 /** Clears document state. **/ 1876 final void clearDocumentState() { 1877 _elementStateCount = 0; 1878 } 1879 1880 /** 1881 * Returns the namespace prefix for the specified URI. 1882 * If the URI has been mapped to a prefix, returns the 1883 * prefix, otherwise returns null. 1884 * 1885 * @param namespaceURI The namespace URI 1886 * @return The namespace prefix if known, or null 1887 */ 1888 protected String getPrefix( String namespaceURI ) 1889 { 1890 String prefix; 1891 1892 if ( _prefixes != null ) { 1893 prefix = _prefixes.get( namespaceURI ); 1894 if ( prefix != null ) 1895 return prefix; 1896 } 1897 if ( _elementStateCount == 0 ) { 1898 return null; 1899 } 1900 for ( int i = _elementStateCount ; i > 0 ; --i ) { 1901 if ( _elementStates[ i ].prefixes != null ) { 1902 prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI ); 1903 if ( prefix != null ) 1904 return prefix; 1905 } 1906 } 1907 return null; 1908 } 1909 1910 /** 1911 * The method modifies global DOM error object 1912 * 1913 * @param message 1914 * @param severity 1915 * @param type 1916 * @return a DOMError 1917 */ 1918 protected DOMError modifyDOMError(String message, short severity, String type, Node node){ 1919 fDOMError.reset(); 1920 fDOMError.fMessage = message; 1921 fDOMError.fType = type; 1922 fDOMError.fSeverity = severity; 1923 fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null); 1924 return fDOMError; 1925 1926 } 1927 1928 1929 protected void fatalError(String message) throws IOException{ 1930 if (fDOMErrorHandler != null) { 1931 modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode); 1932 fDOMErrorHandler.handleError(fDOMError); 1933 } 1934 else { 1935 throw new IOException(message); 1936 } 1937 } 1938 1939 /** 1940 * DOM level 3: 1941 * Check a node to determine if it contains unbound namespace prefixes. 1942 * 1943 * @param node The node to check for unbound namespace prefices 1944 */ 1945 protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{ 1946 1947 } 1948 }